Skip to main content

just_kdl/
reader.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2//! Read document events out of a file.
3//!
4//! You probably want to start at [`Reader`].
5
6// TODO: consider re-merging lexer (or at least using hinted lexing?)
7
8use alloc::vec::Vec;
9use core::ops::Range;
10
11use smol_str::SmolStr;
12use thiserror::Error;
13
14use crate::dom::{Entry, Event, Value};
15use crate::lexer::{Input, Lexer, LexerError, Token};
16
17/// An error while reading
18#[derive(Debug, Error)]
19#[non_exhaustive]
20pub enum ReaderError {
21	#[error(transparent)]
22	/// Inner lexer error, includes IO errors
23	Lexer(LexerError),
24	#[error("Expected string, got {0}")]
25	#[doc = "Expected string, got {0}"]
26	ExpectedString(Token),
27	#[error("Expected value, got {0}")]
28	#[doc = "Expected value, got {0}"]
29	ExpectedValue(Token),
30	#[error("Expected ')', got {0}")]
31	#[doc = "Expected ')', got {0}"]
32	ExpectedCloseParen(Token),
33	#[error("Unclosed '{{' before end of file")]
34	#[doc = "Unclosed '{' before end of file"]
35	UnclosedOpen,
36	#[error("Unmatched '}}'")]
37	#[doc = "Unmatched '}'"]
38	UnmatchedClose,
39	#[error("Expected '/-' or space before entry")]
40	#[doc = "Expected '/-' or space before entry"]
41	ExpectedEntrySpace,
42	#[error("Expected entry, block, or end of node")]
43	#[doc = "Expected entry, block, or end of node"]
44	ExpectedEntry,
45	#[error("Expected block or end of node")]
46	#[doc = "Expected block or end of node"]
47	ExpectedBlock,
48}
49/// Value (event, error) with a span attached
50pub type Spanned<T> = (T, Range<usize>);
51// internal result with error spans
52type ReaderResult<T> = Result<T, Spanned<ReaderError>>;
53
54/// Event that might be commented
55#[derive(PartialEq)]
56enum InnerEvent {
57	/// none = skip or sd
58	Node(Option<(Option<SmolStr>, SmolStr)>),
59	/// none = skip or sd, type, key, value
60	Entry(Option<(Option<SmolStr>, Option<SmolStr>, Value)>),
61	/// true = sd
62	Children(bool),
63	/// true = sd children block
64	End(bool),
65	Done,
66}
67
68enum State {
69	Begin,
70	NextNode,
71	/// true = pre-spaced
72	NodeEntries(bool),
73	/// true = real part of node
74	NodeChildren(bool),
75	Done,
76}
77
78/// Reader of document events.
79pub struct Reader<T> {
80	// TODO: generic token source, if there's a use case for that
81	lexer: Lexer<T>,
82	// TODO/perf: remove implicit peek/advance pairs in mid-parsing code, looks awful
83	peek_token: Option<Spanned<Token>>,
84	state: State,
85	/// current children block depth, true = sd
86	// TODO/perf: replace with a bitwise vector of some kind
87	brackets: Vec<bool>,
88}
89
90impl<T: Input> Reader<T> {
91	/// Create a new reader from an input.
92	pub fn new(input: T) -> Self { Self::from_lexer(Lexer::new(input)) }
93	/// Create a new reader directly from the token source.
94	pub fn from_lexer(lexer: Lexer<T>) -> Self {
95		Self {
96			lexer,
97			peek_token: None,
98			state: State::Begin,
99			brackets: Vec::new(),
100		}
101	}
102	fn peek(&mut self, skip: bool) -> ReaderResult<&Spanned<Token>> {
103		// some weird lifetimes here, but it works
104		let mut res = Ok(());
105		let token = self.peek_token.get_or_insert_with(|| {
106			let (token, pos) = self.lexer.next_token(skip);
107			// this returns a single character span for Spaces / Lines,
108			// that's OK as those spans are only used in:
109			// - Event::End - where we only want one character of span anyways
110			// - Errors, where preserving the end of span isn't too important (no recovery)
111			let span = pos..self.lexer.current_position();
112			match token {
113				Ok(token) => (token, span),
114				Err(err) => {
115					res = Err((ReaderError::Lexer(err), span.clone()));
116					(Token::Eof, span)
117				}
118			}
119		});
120		res.map(|()| &*token)
121	}
122	#[track_caller]
123	fn advance(&mut self) -> Spanned<Token> { self.peek_token.take().unwrap() }
124	fn skip_spaces(&mut self, skip: bool) -> ReaderResult<bool> {
125		if self.peek(skip)?.0 == Token::Spaces {
126			self.advance();
127			Ok(true)
128		} else {
129			Ok(false)
130		}
131	}
132	fn skip_lines(&mut self, skip: bool) -> ReaderResult<()> {
133		match self.peek(skip)?.0 {
134			Token::Spaces => {
135				self.advance();
136				if self.peek(skip)?.0 == Token::Lines {
137					self.advance();
138				}
139			}
140			Token::Lines => {
141				self.advance();
142			}
143			_ => {}
144		}
145		Ok(())
146	}
147	fn string(token: Spanned<Token>) -> ReaderResult<Option<SmolStr>> {
148		Ok(match token.0 {
149			Token::String(text) => Some(text),
150			Token::SkippedString => None,
151			_ => return Err((ReaderError::ExpectedString(token.0), token.1)),
152		})
153	}
154	fn value(skip: bool, token: Spanned<Token>) -> ReaderResult<Option<Value>> {
155		Ok(match token.0 {
156			Token::SkippedString | Token::SkippedNumber => None,
157			// do base64/base85 decoding
158			Token::String(text) => Some(Value::String(text)),
159			Token::Number(number) => Some(Value::Number(number)),
160			Token::Bool(value) => (!skip).then_some(Value::Bool(value)),
161			Token::Null => (!skip).then_some(Value::Null),
162			_ => return Err((ReaderError::ExpectedValue(token.0), token.1)),
163		})
164	}
165	/// String ) Spaces?
166	fn type_body(&mut self, skip: bool) -> ReaderResult<Option<SmolStr>> {
167		self.skip_spaces(skip)?;
168		self.peek(skip)?;
169		let text = Self::string(self.advance())?;
170		self.skip_spaces(skip)?;
171		self.peek(skip)?;
172		let close = self.advance();
173		if close.0 != Token::CloseParen {
174			return Err((ReaderError::ExpectedCloseParen(close.0), close.1));
175		}
176		self.skip_spaces(skip)?;
177		Ok(text)
178	}
179	/// None = no type or skipped
180	fn maybe_type(&mut self, skip: bool) -> ReaderResult<Option<SmolStr>> {
181		Ok(if self.peek(skip)?.0 == Token::OpenParen {
182			self.advance();
183			self.type_body(skip)?
184		} else {
185			None
186		})
187	}
188	/// (sd, skip)
189	fn maybe_slash_dash(&mut self, skip: bool) -> ReaderResult<(bool, bool)> {
190		if self.peek(skip)?.0 == Token::SlashDash {
191			self.advance();
192			self.skip_lines(true)?;
193			Ok((true, true))
194		} else {
195			Ok((false, skip))
196		}
197	}
198	#[expect(clippy::too_many_lines, reason = "too lazy to fix this")]
199	fn next_inner_event(&mut self, skip: bool) -> ReaderResult<Spanned<InnerEvent>> {
200		match self.state {
201			State::Begin | State::NextNode => {
202				if self.peek(skip)?.0 == Token::Bom {
203					self.advance();
204				}
205				self.skip_lines(skip)?;
206				let case_token = self.peek(skip)?;
207				let span = case_token.1.clone();
208				match case_token.0 {
209					Token::Eof => {
210						if !self.brackets.is_empty() {
211							return Err((ReaderError::UnclosedOpen, span));
212						}
213						self.state = State::Done;
214						return Ok((InnerEvent::Done, span));
215					}
216					Token::CloseCurly => {
217						let span = span.clone();
218						let Some(pop) = self.brackets.pop() else {
219							return Err((ReaderError::UnmatchedClose, span));
220						};
221						self.advance();
222						self.state = State::NodeChildren(pop);
223						return Ok((InnerEvent::End(pop), span));
224					}
225					_ => {}
226				}
227				let (_, skip) = self.maybe_slash_dash(skip)?;
228				let r#type = self.maybe_type(skip)?;
229				self.peek(skip)?;
230				let name_token = self.advance();
231				let span = span.start..name_token.1.end;
232				let name = Self::string(name_token)?;
233				self.state = State::NodeEntries(false);
234				Ok((InnerEvent::Node(name.map(|name| (r#type, name))), span))
235			}
236			State::NodeEntries(_) | State::NodeChildren(_) => {
237				let (spaces, entries, real_body) = match self.state {
238					State::NodeEntries(spaces) => {
239						// reset space state now for following events
240						self.state = State::NodeEntries(false);
241						(spaces, true, true)
242					}
243					State::NodeChildren(real_body) => (false, false, real_body),
244					_ => unreachable!(),
245				};
246				let spaces = spaces || self.skip_spaces(skip)?;
247				let case_token = self.peek(skip)?;
248				let case_span = case_token.1.clone();
249				let start = case_token.1.start;
250				if matches!(
251					case_token.0,
252					Token::Eof | Token::CloseCurly | Token::SemiColon | Token::Lines
253				) {
254					let span = if matches!(case_token.0, Token::SemiColon | Token::Lines) {
255						self.advance();
256						case_span
257					} else {
258						// keep spans incrementing
259						case_span.start..case_span.start
260					};
261					self.state = State::NextNode;
262					return if real_body {
263						Ok((InnerEvent::End(false), span))
264					} else {
265						// this event doesn't require any span, but i have one anyways
266						Ok((InnerEvent::Entry(None), span))
267					};
268				}
269				let (sd, skip_or_sd) = self.maybe_slash_dash(skip)?;
270				let mv_token = self.peek(skip_or_sd)?;
271				let mv_span = mv_token.1.clone();
272				match mv_token.0 {
273					Token::OpenCurly if real_body || sd => {
274						self.advance();
275						self.brackets.push(sd && real_body);
276						self.state = State::NextNode;
277						Ok((InnerEvent::Children(sd), start..mv_span.end))
278					}
279					_ if !entries => Err((ReaderError::ExpectedBlock, start..mv_span.end)),
280					_ if !sd && !spaces => {
281						Err((ReaderError::ExpectedEntrySpace, start..mv_span.end))
282					}
283					// value or key, unsure
284					Token::String(_) | Token::SkippedString => {
285						let first = self.advance();
286						// NOTE: if skipping, the next (property) might be peeked. if outer-reader
287						// skip ever stops after a Entry, change this to a proper difference!
288						let next_spaces = self.skip_spaces(skip)?;
289						if self.peek(skip)?.0 == Token::Equals {
290							let name = Self::string(first)?;
291							self.advance();
292							self.skip_spaces(skip_or_sd)?;
293							let r#type = self.maybe_type(skip_or_sd)?;
294							self.peek(skip_or_sd)?;
295							let token = self.advance();
296							let span = start..token.1.end;
297							let value = Self::value(skip_or_sd, token)?;
298							Ok((
299								InnerEvent::Entry(
300									name.zip(value)
301										.map(|(key, value)| (r#type, Some(key), value)),
302								),
303								span,
304							))
305						} else {
306							// consumed spaces that might be needed for the next entry
307							self.state = State::NodeEntries(next_spaces);
308							let span = start..first.1.end;
309							let value = Self::value(skip_or_sd, first)?;
310							Ok((
311								InnerEvent::Entry(value.map(|value| (None, None, value))),
312								span,
313							))
314						}
315					}
316					Token::Number(_) | Token::SkippedNumber | Token::Bool(_) | Token::Null => {
317						let token = self.advance();
318						let span = start..token.1.end;
319						let value = Self::value(skip_or_sd, token)?;
320						Ok((
321							InnerEvent::Entry(value.map(|value| (None, None, value))),
322							span,
323						))
324					}
325					Token::OpenParen => {
326						self.advance();
327						let r#type = self.type_body(skip_or_sd)?;
328						self.peek(skip_or_sd)?;
329						let token = self.advance();
330						let span = start..token.1.end;
331						let value = Self::value(skip_or_sd, token)?;
332						Ok((
333							InnerEvent::Entry(value.map(|value| (r#type, None, value))),
334							span,
335						))
336					}
337					_ => Err((ReaderError::ExpectedEntry, start..mv_span.end)),
338				}
339			}
340			State::Done => {
341				let pos = self.lexer.current_position();
342				Ok((InnerEvent::Done, pos..pos))
343			}
344		}
345	}
346	fn skip_bracketed(&mut self, open: &InnerEvent, close: &InnerEvent) -> ReaderResult<()> {
347		let mut counter = 0_usize;
348		loop {
349			let event = self.next_inner_event(true)?.0;
350			if event == InnerEvent::Done {
351				// inner reader handles bracket mismatches, and this is trivially trigger-able
352				break;
353			}
354			if &event == open {
355				counter += 1;
356			} else if &event == close {
357				if let Some(next) = counter.checked_sub(1) {
358					counter = next;
359				} else {
360					break;
361				}
362			}
363		}
364		Ok(())
365	}
366	fn next_event(&mut self) -> ReaderResult<Option<Spanned<Event>>> {
367		Ok(Some(loop {
368			let (event, span) = self.next_inner_event(false)?;
369			break (
370				match event {
371					InnerEvent::Node(Some((r#type, name))) => Event::Node { r#type, name },
372					InnerEvent::Node(None) => {
373						self.skip_to_end()?;
374						continue;
375					}
376					InnerEvent::Entry(Some((r#type, key, value))) => Event::Entry(Entry {
377						r#type,
378						name: key,
379						value,
380					}),
381					InnerEvent::Entry(None) => continue,
382					InnerEvent::Children(true) => {
383						self.skip_bracketed(&InnerEvent::Children(true), &InnerEvent::End(true))?;
384						continue;
385					}
386					InnerEvent::Children(false) => Event::Children,
387					InnerEvent::End(true) => unreachable!("bad stream"),
388					InnerEvent::End(false) => Event::End,
389					InnerEvent::Done => return Ok(None),
390				},
391				span,
392			);
393		}))
394	}
395	/// Skip to the next node. When currently reading a node (i.e. after a
396	/// `Node` event) this ends the node, otherwise it goes to the parent block
397	///
398	/// # Errors
399	/// On any syntax errors.
400	pub fn skip_to_end(&mut self) -> ReaderResult<()> {
401		self.skip_bracketed(&InnerEvent::Node(None), &InnerEvent::End(false))
402	}
403}
404
405/// Read one event at a time.
406impl<T: Input> Iterator for Reader<T> {
407	type Item = ReaderResult<Spanned<Event>>;
408	fn next(&mut self) -> Option<Self::Item> {
409		self.next_event()
410			// stop reader after errors
411			.inspect_err(|_| self.state = State::Done)
412			.transpose()
413	}
414}