Skip to main content

jrsonnet_lexer/
string_block.rs

1#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2pub enum StringBlockError {
3	UnexpectedEnd,
4	MissingNewLine,
5	MissingTermination,
6	MissingIndent,
7}
8
9use StringBlockError::*;
10use logos::Lexer;
11
12use crate::generated::syntax_kinds::SyntaxKind;
13
14pub(crate) fn lex_str_block_test(lex: &mut Lexer<'_, SyntaxKind>) {
15	let _ = lex_str_block(lex);
16}
17
18pub(crate) struct Context<'a> {
19	source: &'a str,
20	index: usize,
21}
22
23impl<'a> Context<'a> {
24	fn rest(&self) -> &'a str {
25		&self.source[self.index..]
26	}
27
28	fn next(&mut self) -> Option<char> {
29		if self.index == self.source.len() {
30			return None;
31		}
32
33		match self.rest().chars().next() {
34			None => None,
35			Some(c) => {
36				self.index += c.len_utf8();
37				Some(c)
38			}
39		}
40	}
41
42	fn peek(&self) -> Option<char> {
43		if self.index == self.source.len() {
44			return None;
45		}
46
47		self.rest().chars().next()
48	}
49
50	fn eat_if(&mut self, f: impl Fn(char) -> bool) -> usize {
51		if self.peek().is_some_and(f) {
52			self.index += 1;
53			return 1;
54		}
55		0
56	}
57
58	fn eat_while(&mut self, f: impl Fn(char) -> bool) -> usize {
59		if self.index == self.source.len() {
60			return 0;
61		}
62
63		let next_char = self.rest().char_indices().find(|(_, c)| !f(*c));
64
65		match next_char {
66			None => {
67				let diff = self.source.len() - self.index;
68				self.index = self.source.len();
69				diff
70			}
71			Some((idx, _)) => {
72				self.index += idx;
73				idx
74			}
75		}
76	}
77
78	fn skip(&mut self, len: usize) {
79		self.index = match self.index + len {
80			n if n > self.source.len() => self.source.len(),
81			n => n,
82		};
83	}
84}
85
86// Check that b has at least the same whitespace prefix as a and returns the
87// amount of this whitespace, otherwise returns 0.  If a has no whitespace
88// prefix than return 0.
89fn check_whitespace(a: &str, b: &str) -> usize {
90	let a = a.as_bytes();
91	let b = b.as_bytes();
92
93	for i in 0..a.len() {
94		if a[i] != b' ' && a[i] != b'\t' {
95			// a has run out of whitespace and b matched up to this point. Return result.
96			return i;
97		}
98
99		if i >= b.len() {
100			// We ran off the edge of b while a still has whitespace. Return 0 as failure.
101			return 0;
102		}
103
104		if a[i] != b[i] {
105			// a has whitespace but b does not. Return 0 as failure.
106			return 0;
107		}
108	}
109
110	// We ran off the end of a and b kept up
111	a.len()
112}
113
114pub(crate) trait StrBlockLexCtx<'d> {
115	fn remainder(&self) -> &'d str;
116	fn eat_error(&mut self, ctx: &Context<'d>);
117	fn bump_pos(&mut self, s: usize);
118	fn mark_truncating(&mut self);
119	fn mark_line(&mut self, line: &'d str);
120}
121
122impl<'d> StrBlockLexCtx<'d> for Lexer<'d, SyntaxKind> {
123	fn remainder(&self) -> &'d str {
124		self.remainder()
125	}
126	fn eat_error(&mut self, ctx: &Context<'d>) {
127		let end_index = ctx
128			.rest()
129			.find("|||")
130			.map_or_else(|| ctx.rest().len(), |v| v + 3);
131		self.bump(ctx.index + end_index);
132	}
133	fn bump_pos(&mut self, s: usize) {
134		self.bump(s);
135	}
136	fn mark_truncating(&mut self) {
137		// Lexer test doesn't collect anything
138	}
139	fn mark_line(&mut self, _line: &'d str) {
140		// Lexer test doesn't collect anything
141	}
142}
143
144pub fn collect_lexed_str_block(input: &str) -> Result<CollectStrBlock<'_>, StringBlockError> {
145	let mut collect = CollectStrBlock {
146		truncate: false,
147		lines: vec![],
148		input,
149		offset: 0,
150	};
151	lex_str_block(&mut collect)?;
152	Ok(collect)
153}
154
155pub struct CollectStrBlock<'s> {
156	pub truncate: bool,
157	pub lines: Vec<&'s str>,
158	input: &'s str,
159	offset: usize,
160}
161
162impl<'d> StrBlockLexCtx<'d> for CollectStrBlock<'d> {
163	fn remainder(&self) -> &'d str {
164		self.input
165	}
166
167	fn eat_error(&mut self, _ctx: &Context<'d>) {
168		// Error will be returned, no need to record it here
169	}
170
171	fn bump_pos(&mut self, s: usize) {
172		self.offset += s;
173	}
174
175	fn mark_truncating(&mut self) {
176		self.truncate = true;
177	}
178
179	fn mark_line(&mut self, line: &'d str) {
180		self.lines.push(line);
181	}
182}
183
184pub(crate) fn lex_str_block<'a>(lex: &mut impl StrBlockLexCtx<'a>) -> Result<(), StringBlockError> {
185	// debug_assert_eq!(lex.slice(), "|||");
186	let mut ctx = Context::<'a> {
187		source: lex.remainder(),
188		index: 0,
189	};
190
191	if ctx.eat_if(|v| v == '-') != 0 {
192		lex.mark_truncating();
193	}
194
195	// Skip whitespaces
196	ctx.eat_while(|r| r == ' ' || r == '\t' || r == '\r');
197
198	// Skip \n
199	match ctx.next() {
200		Some('\n') => (),
201		None => {
202			lex.eat_error(&ctx);
203			return Err(UnexpectedEnd);
204		}
205		// Text block requires new line after |||.
206		Some(_) => {
207			lex.eat_error(&ctx);
208			return Err(MissingNewLine);
209		}
210	}
211
212	// Process leading blank lines before calculating string block indent
213	while ctx.peek() == Some('\n') {
214		lex.mark_line("");
215		ctx.next();
216	}
217
218	let mut num_whitespace = check_whitespace(ctx.rest(), ctx.rest());
219	let str_block_indent = &ctx.rest()[..num_whitespace];
220
221	if num_whitespace == 0 {
222		// Text block's first line must start with whitespace
223		lex.eat_error(&ctx);
224		return Err(MissingIndent);
225	}
226
227	loop {
228		debug_assert_ne!(num_whitespace, 0, "Unexpected value for num_whitespace");
229		ctx.skip(num_whitespace);
230
231		let rest = ctx.rest();
232		match rest.find('\n') {
233			None => {
234				ctx.index = ctx.source.len();
235				lex.eat_error(&ctx);
236				return Err(UnexpectedEnd);
237			}
238			Some(nl_pos) => {
239				lex.mark_line(&rest[..nl_pos]);
240				ctx.index += nl_pos + 1;
241			}
242		}
243
244		// Skip any blank lines
245		while ctx.peek() == Some('\n') {
246			lex.mark_line("");
247			ctx.next();
248		}
249
250		// Look at the next line
251		num_whitespace = check_whitespace(str_block_indent, ctx.rest());
252		if num_whitespace == 0 {
253			// End of the text block
254			// let mut term_indent = String::with_capacity(num_whitespace);
255			while let Some(' ' | '\t') = ctx.peek() {
256				// term_indent.push(
257				ctx.next().unwrap();
258				// );
259			}
260
261			if !ctx.rest().starts_with("|||") {
262				if ctx.rest().is_empty() {
263					lex.bump_pos(ctx.index);
264					return Err(UnexpectedEnd);
265				}
266				lex.eat_error(&ctx);
267				return Err(MissingTermination);
268			}
269
270			// Skip '|||'
271			ctx.skip(3);
272			break;
273		}
274	}
275
276	lex.bump_pos(ctx.index);
277	Ok(())
278}