jrsonnet_lexer/
string_block.rs1#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2pub enum StringBlockError {
3 UnexpectedEnd,
4 MissingNewLine,
5 MissingTermination,
6 MissingIndent,
7}
8
9use StringBlockError::*;
10use logos::Lexer;
11
12use crate::generated::syntax_kinds::SyntaxKind;
13
14pub(crate) fn lex_str_block_test(lex: &mut Lexer<'_, SyntaxKind>) {
15 let _ = lex_str_block(lex);
16}
17
18pub(crate) struct Context<'a> {
19 source: &'a str,
20 index: usize,
21}
22
23impl<'a> Context<'a> {
24 fn rest(&self) -> &'a str {
25 &self.source[self.index..]
26 }
27
28 fn next(&mut self) -> Option<char> {
29 if self.index == self.source.len() {
30 return None;
31 }
32
33 match self.rest().chars().next() {
34 None => None,
35 Some(c) => {
36 self.index += c.len_utf8();
37 Some(c)
38 }
39 }
40 }
41
42 fn peek(&self) -> Option<char> {
43 if self.index == self.source.len() {
44 return None;
45 }
46
47 self.rest().chars().next()
48 }
49
50 fn eat_if(&mut self, f: impl Fn(char) -> bool) -> usize {
51 if self.peek().is_some_and(f) {
52 self.index += 1;
53 return 1;
54 }
55 0
56 }
57
58 fn eat_while(&mut self, f: impl Fn(char) -> bool) -> usize {
59 if self.index == self.source.len() {
60 return 0;
61 }
62
63 let next_char = self.rest().char_indices().find(|(_, c)| !f(*c));
64
65 match next_char {
66 None => {
67 let diff = self.source.len() - self.index;
68 self.index = self.source.len();
69 diff
70 }
71 Some((idx, _)) => {
72 self.index += idx;
73 idx
74 }
75 }
76 }
77
78 fn skip(&mut self, len: usize) {
79 self.index = match self.index + len {
80 n if n > self.source.len() => self.source.len(),
81 n => n,
82 };
83 }
84}
85
86fn check_whitespace(a: &str, b: &str) -> usize {
90 let a = a.as_bytes();
91 let b = b.as_bytes();
92
93 for i in 0..a.len() {
94 if a[i] != b' ' && a[i] != b'\t' {
95 return i;
97 }
98
99 if i >= b.len() {
100 return 0;
102 }
103
104 if a[i] != b[i] {
105 return 0;
107 }
108 }
109
110 a.len()
112}
113
114pub(crate) trait StrBlockLexCtx<'d> {
115 fn remainder(&self) -> &'d str;
116 fn eat_error(&mut self, ctx: &Context<'d>);
117 fn bump_pos(&mut self, s: usize);
118 fn mark_truncating(&mut self);
119 fn mark_line(&mut self, line: &'d str);
120}
121
122impl<'d> StrBlockLexCtx<'d> for Lexer<'d, SyntaxKind> {
123 fn remainder(&self) -> &'d str {
124 self.remainder()
125 }
126 fn eat_error(&mut self, ctx: &Context<'d>) {
127 let end_index = ctx
128 .rest()
129 .find("|||")
130 .map_or_else(|| ctx.rest().len(), |v| v + 3);
131 self.bump(ctx.index + end_index);
132 }
133 fn bump_pos(&mut self, s: usize) {
134 self.bump(s);
135 }
136 fn mark_truncating(&mut self) {
137 }
139 fn mark_line(&mut self, _line: &'d str) {
140 }
142}
143
144pub fn collect_lexed_str_block(input: &str) -> Result<CollectStrBlock<'_>, StringBlockError> {
145 let mut collect = CollectStrBlock {
146 truncate: false,
147 lines: vec![],
148 input,
149 offset: 0,
150 };
151 lex_str_block(&mut collect)?;
152 Ok(collect)
153}
154
155pub struct CollectStrBlock<'s> {
156 pub truncate: bool,
157 pub lines: Vec<&'s str>,
158 input: &'s str,
159 offset: usize,
160}
161
162impl<'d> StrBlockLexCtx<'d> for CollectStrBlock<'d> {
163 fn remainder(&self) -> &'d str {
164 self.input
165 }
166
167 fn eat_error(&mut self, _ctx: &Context<'d>) {
168 }
170
171 fn bump_pos(&mut self, s: usize) {
172 self.offset += s;
173 }
174
175 fn mark_truncating(&mut self) {
176 self.truncate = true;
177 }
178
179 fn mark_line(&mut self, line: &'d str) {
180 self.lines.push(line);
181 }
182}
183
184pub(crate) fn lex_str_block<'a>(lex: &mut impl StrBlockLexCtx<'a>) -> Result<(), StringBlockError> {
185 let mut ctx = Context::<'a> {
187 source: lex.remainder(),
188 index: 0,
189 };
190
191 if ctx.eat_if(|v| v == '-') != 0 {
192 lex.mark_truncating();
193 }
194
195 ctx.eat_while(|r| r == ' ' || r == '\t' || r == '\r');
197
198 match ctx.next() {
200 Some('\n') => (),
201 None => {
202 lex.eat_error(&ctx);
203 return Err(UnexpectedEnd);
204 }
205 Some(_) => {
207 lex.eat_error(&ctx);
208 return Err(MissingNewLine);
209 }
210 }
211
212 while ctx.peek() == Some('\n') {
214 lex.mark_line("");
215 ctx.next();
216 }
217
218 let mut num_whitespace = check_whitespace(ctx.rest(), ctx.rest());
219 let str_block_indent = &ctx.rest()[..num_whitespace];
220
221 if num_whitespace == 0 {
222 lex.eat_error(&ctx);
224 return Err(MissingIndent);
225 }
226
227 loop {
228 debug_assert_ne!(num_whitespace, 0, "Unexpected value for num_whitespace");
229 ctx.skip(num_whitespace);
230
231 let rest = ctx.rest();
232 match rest.find('\n') {
233 None => {
234 ctx.index = ctx.source.len();
235 lex.eat_error(&ctx);
236 return Err(UnexpectedEnd);
237 }
238 Some(nl_pos) => {
239 lex.mark_line(&rest[..nl_pos]);
240 ctx.index += nl_pos + 1;
241 }
242 }
243
244 while ctx.peek() == Some('\n') {
246 lex.mark_line("");
247 ctx.next();
248 }
249
250 num_whitespace = check_whitespace(str_block_indent, ctx.rest());
252 if num_whitespace == 0 {
253 while let Some(' ' | '\t') = ctx.peek() {
256 ctx.next().unwrap();
258 }
260
261 if !ctx.rest().starts_with("|||") {
262 if ctx.rest().is_empty() {
263 lex.bump_pos(ctx.index);
264 return Err(UnexpectedEnd);
265 }
266 lex.eat_error(&ctx);
267 return Err(MissingTermination);
268 }
269
270 ctx.skip(3);
272 break;
273 }
274 }
275
276 lex.bump_pos(ctx.index);
277 Ok(())
278}