panache_parser/parser/utils/
helpers.rs1use crate::syntax::SyntaxKind;
4use rowan::GreenNodeBuilder;
5
6pub(crate) fn emit_line_tokens(builder: &mut GreenNodeBuilder<'static>, line: &str) {
9 if let Some(text) = line.strip_suffix("\r\n") {
11 builder.token(SyntaxKind::TEXT.into(), text);
12 builder.token(SyntaxKind::NEWLINE.into(), "\r\n");
13 } else if let Some(text) = line.strip_suffix('\n') {
14 builder.token(SyntaxKind::TEXT.into(), text);
15 builder.token(SyntaxKind::NEWLINE.into(), "\n");
16 } else {
17 builder.token(SyntaxKind::TEXT.into(), line);
19 }
20}
21
22pub(crate) fn emit_separator_tokens(builder: &mut GreenNodeBuilder<'static>, line: &str) {
33 let (content, newline) = strip_newline(line);
34 let bytes = content.as_bytes();
35 let mut i = 0;
36 while i < bytes.len() {
37 let b = bytes[i];
38 match b {
39 b'|' | b'+' => {
40 builder.token(SyntaxKind::TABLE_SEP_DELIM.into(), &content[i..i + 1]);
41 i += 1;
42 }
43 b':' => {
44 builder.token(SyntaxKind::TABLE_SEP_COLON.into(), &content[i..i + 1]);
45 i += 1;
46 }
47 b'-' => {
48 let start = i;
49 while i < bytes.len() && bytes[i] == b'-' {
50 i += 1;
51 }
52 builder.token(SyntaxKind::TABLE_SEP_DASHES.into(), &content[start..i]);
53 }
54 b'=' => {
55 let start = i;
56 while i < bytes.len() && bytes[i] == b'=' {
57 i += 1;
58 }
59 builder.token(SyntaxKind::TABLE_SEP_EQUALS.into(), &content[start..i]);
60 }
61 b' ' | b'\t' => {
62 let start = i;
63 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
64 i += 1;
65 }
66 builder.token(SyntaxKind::TABLE_SEP_WHITESPACE.into(), &content[start..i]);
67 }
68 _ => {
69 let start = i;
74 while i < bytes.len()
75 && !matches!(bytes[i], b'|' | b'+' | b':' | b'-' | b'=' | b' ' | b'\t')
76 {
77 i += 1;
78 }
79 builder.token(SyntaxKind::TEXT.into(), &content[start..i]);
80 }
81 }
82 }
83 if !newline.is_empty() {
84 builder.token(SyntaxKind::NEWLINE.into(), newline);
85 }
86}
87
88pub(crate) fn strip_leading_spaces_n(line: &str, max_spaces: usize) -> &str {
91 let spaces_to_strip = line
92 .chars()
93 .take(max_spaces)
94 .take_while(|&c| c == ' ')
95 .count();
96 &line[spaces_to_strip..]
97}
98
99pub(crate) fn strip_leading_spaces(line: &str) -> &str {
102 strip_leading_spaces_n(line, 3)
103}
104
105pub(crate) fn strip_newline(line: &str) -> (&str, &str) {
108 if let Some(content) = line.strip_suffix("\r\n") {
109 (content, "\r\n")
110 } else if let Some(content) = line.strip_suffix('\n') {
111 (content, "\n")
112 } else {
113 (line, "")
114 }
115}
116
117#[inline]
122pub(crate) fn trim_end_newlines(s: &str) -> &str {
123 let bytes = s.as_bytes();
124 let mut end = bytes.len();
125 while end > 0 {
126 let b = bytes[end - 1];
127 if b == b'\n' || b == b'\r' {
128 end -= 1;
129 } else {
130 break;
131 }
132 }
133 unsafe { std::str::from_utf8_unchecked(&bytes[..end]) }
136}
137
138#[inline]
141pub(crate) fn trim_start_spaces_tabs(s: &str) -> &str {
142 let bytes = s.as_bytes();
143 let mut start = 0;
144 while start < bytes.len() {
145 let b = bytes[start];
146 if b == b' ' || b == b'\t' {
147 start += 1;
148 } else {
149 break;
150 }
151 }
152 unsafe { std::str::from_utf8_unchecked(&bytes[start..]) }
154}
155
156#[inline]
161pub(crate) fn is_blank_line(s: &str) -> bool {
162 s.as_bytes()
163 .iter()
164 .all(|&b| b == b' ' || b == b'\t' || b == b'\n' || b == b'\r')
165}
166
167#[inline]
170pub(crate) fn trim_end_spaces_tabs(s: &str) -> &str {
171 let bytes = s.as_bytes();
172 let mut end = bytes.len();
173 while end > 0 {
174 let b = bytes[end - 1];
175 if b == b' ' || b == b'\t' {
176 end -= 1;
177 } else {
178 break;
179 }
180 }
181 unsafe { std::str::from_utf8_unchecked(&bytes[..end]) }
183}
184
185pub(crate) fn split_lines_inclusive(input: &str) -> Vec<&str> {
188 if input.is_empty() {
189 return vec![];
190 }
191
192 let mut lines = Vec::new();
193 let mut start = 0;
194 let bytes = input.as_bytes();
195 let len = bytes.len();
196
197 let mut i = 0;
198 while i < len {
199 if bytes[i] == b'\n' {
200 lines.push(&input[start..=i]);
202 start = i + 1;
203 i += 1;
204 } else if bytes[i] == b'\r' && i + 1 < len && bytes[i + 1] == b'\n' {
205 lines.push(&input[start..=i + 1]);
207 start = i + 2;
208 i += 2;
209 } else {
210 i += 1;
211 }
212 }
213
214 if start < len {
216 lines.push(&input[start..]);
217 }
218
219 lines
220}
221
222#[cfg(test)]
223mod tests {
224 use super::*;
225
226 #[test]
227 fn test_strip_leading_spaces_n() {
228 assert_eq!(strip_leading_spaces_n(" text", 3), "text");
229 assert_eq!(strip_leading_spaces_n(" text", 3), "text");
230 assert_eq!(strip_leading_spaces_n(" text", 3), "text");
231 assert_eq!(strip_leading_spaces_n("text", 3), "text");
232 assert_eq!(strip_leading_spaces_n(" text", 3), " text");
233 }
234
235 #[test]
236 fn test_strip_newline() {
237 assert_eq!(strip_newline("text\n"), ("text", "\n"));
238 assert_eq!(strip_newline("text\r\n"), ("text", "\r\n"));
239 assert_eq!(strip_newline("text"), ("text", ""));
240 }
241
242 #[test]
243 fn test_trim_end_newlines() {
244 assert_eq!(trim_end_newlines("foo\n"), "foo");
245 assert_eq!(trim_end_newlines("foo\r\n"), "foo");
246 assert_eq!(trim_end_newlines("foo\n\n"), "foo");
247 assert_eq!(trim_end_newlines("foo"), "foo");
248 assert_eq!(trim_end_newlines(""), "");
249 assert_eq!(trim_end_newlines("\n"), "");
250 assert_eq!(trim_end_newlines("föö\n"), "föö");
252 }
253
254 fn separator_tokens(line: &str) -> Vec<(SyntaxKind, String)> {
255 let mut builder = GreenNodeBuilder::new();
256 builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
257 emit_separator_tokens(&mut builder, line);
258 builder.finish_node();
259 let node = crate::syntax::SyntaxNode::new_root(builder.finish());
260 node.children_with_tokens()
261 .filter_map(|el| el.into_token())
262 .map(|t| (t.kind(), t.text().to_string()))
263 .collect()
264 }
265
266 #[test]
267 fn test_emit_separator_tokens_reconstruction() {
268 for line in [
270 "|:--|--:|:-:|\n",
271 "+------+:----:+------+\n",
272 "+======+======+\r\n",
273 "------- ------ ----------\n",
274 ":--:", "|:--|--:|?weird|\n", ] {
277 let reconstructed: String = separator_tokens(line)
278 .iter()
279 .map(|(_, t)| t.as_str())
280 .collect();
281 assert_eq!(reconstructed, line, "round-trip failed for {line:?}");
282 }
283 }
284
285 #[test]
286 fn test_emit_separator_tokens_kinds() {
287 use SyntaxKind::*;
288 assert_eq!(
289 separator_tokens("|:--|--:|\n"),
290 vec![
291 (TABLE_SEP_DELIM, "|".to_string()),
292 (TABLE_SEP_COLON, ":".to_string()),
293 (TABLE_SEP_DASHES, "--".to_string()),
294 (TABLE_SEP_DELIM, "|".to_string()),
295 (TABLE_SEP_DASHES, "--".to_string()),
296 (TABLE_SEP_COLON, ":".to_string()),
297 (TABLE_SEP_DELIM, "|".to_string()),
298 (NEWLINE, "\n".to_string()),
299 ],
300 );
301 assert_eq!(
303 separator_tokens("--- ---\n"),
304 vec![
305 (TABLE_SEP_DASHES, "---".to_string()),
306 (TABLE_SEP_WHITESPACE, " ".to_string()),
307 (TABLE_SEP_DASHES, "---".to_string()),
308 (NEWLINE, "\n".to_string()),
309 ],
310 );
311 }
312
313 #[test]
314 fn test_trim_spaces_tabs() {
315 assert_eq!(trim_start_spaces_tabs(" \tfoo"), "foo");
316 assert_eq!(trim_start_spaces_tabs("foo"), "foo");
317 assert_eq!(trim_start_spaces_tabs(""), "");
318 assert_eq!(trim_end_spaces_tabs("foo \t"), "foo");
319 assert_eq!(trim_end_spaces_tabs("foo"), "foo");
320 assert_eq!(trim_end_spaces_tabs(""), "");
321 assert_eq!(trim_end_spaces_tabs("föö "), "föö");
322 }
323}