nquads_syntax/
parsing.rs

1use crate::{
2	lexing::{self, Token, Tokens},
3	Lexer,
4};
5use decoded_char::DecodedChar;
6use iref::{Iri, IriBuf};
7use locspan::{Meta, Span};
8use rdf_types::{Id, Literal, LiteralType};
9use static_iref::iri;
10use std::fmt;
11
12#[derive(Debug)]
13pub enum Error<E> {
14	Lexer(E),
15	Unexpected(Option<Token>),
16}
17
18pub type MetaError<E, Span> = Meta<Box<Error<E>>, Span>;
19
20impl<E: fmt::Display> fmt::Display for Error<E> {
21	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
22		match self {
23			Self::Unexpected(None) => write!(f, "unexpected end of file"),
24			Self::Unexpected(Some(token)) => write!(f, "unexpected {token}"),
25			Self::Lexer(e) => e.fmt(f),
26		}
27	}
28}
29
30impl<E: 'static + std::error::Error> std::error::Error for Error<E> {
31	fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
32		match self {
33			Self::Lexer(e) => Some(e),
34			_ => None,
35		}
36	}
37}
38
39pub struct Parser<L> {
40	lexer: L,
41}
42
43impl<L> Parser<L> {
44	pub fn new(lexer: L) -> Self {
45		Self { lexer }
46	}
47}
48
49const XSD_STRING: &Iri = iri!("http://www.w3.org/2001/XMLSchema#string");
50
51impl<L: Tokens> Parser<L> {
52	fn next(&mut self) -> Result<Meta<Option<Token>, Span>, MetaError<L::Error, Span>> {
53		self.lexer
54			.next()
55			.map_err(|Meta(e, span)| Meta(Box::new(Error::Lexer(e)), span))
56	}
57
58	#[allow(clippy::type_complexity)]
59	fn peek(&mut self) -> Result<Meta<Option<&Token>, Span>, MetaError<L::Error, Span>> {
60		self.lexer
61			.peek()
62			.map_err(|Meta(e, span)| Meta(Box::new(Error::Lexer(e)), span))
63	}
64
65	fn begin(&mut self) -> Result<Span, MetaError<L::Error, Span>> {
66		self.lexer
67			.begin()
68			.map_err(|Meta(e, span)| Meta(Box::new(Error::Lexer(e)), span))
69	}
70
71	fn last_span(&self) -> Span {
72		self.lexer.last()
73	}
74
75	#[allow(clippy::type_complexity)]
76	fn parse_literal(
77		&mut self,
78		Meta(string, string_span): Meta<String, Span>,
79	) -> Result<Meta<Literal, Span>, MetaError<L::Error, Span>> {
80		let mut span = string_span;
81		match self.peek()? {
82			Meta(Some(Token::LangTag(_)), tag_span) => {
83				let tag = match self.next()? {
84					Meta(Some(Token::LangTag(tag)), _) => tag,
85					_ => panic!("expected lang tag"),
86				};
87
88				span.append(tag_span);
89				Ok(Meta(
90					Literal::new(string, LiteralType::LangString(tag)),
91					span,
92				))
93			}
94			Meta(Some(Token::Carets), _) => {
95				self.next()?;
96				match self.next()? {
97					Meta(Some(Token::Iri(iri)), iri_span) => {
98						span.append(iri_span);
99						Ok(Meta(Literal::new(string, LiteralType::Any(iri)), span))
100					}
101					Meta(unexpected, span) => {
102						Err(Meta(Box::new(Error::Unexpected(unexpected)), span))
103					}
104				}
105			}
106			_ => Ok(Meta(
107				Literal::new(string, LiteralType::Any(XSD_STRING.to_owned())),
108				span,
109			)),
110		}
111	}
112}
113
114pub trait Parse: Sized {
115	#[allow(clippy::type_complexity)]
116	fn parse_with<L>(parser: &mut Parser<L>) -> Result<Meta<Self, Span>, MetaError<L::Error, Span>>
117	where
118		L: Tokens;
119
120	#[inline(always)]
121	fn parse<C, E>(chars: C) -> Result<Meta<Self, Span>, MetaError<lexing::Error<E>, Span>>
122	where
123		C: Iterator<Item = Result<DecodedChar, E>>,
124	{
125		let mut parser = Parser::new(Lexer::new(chars));
126		Self::parse_with(&mut parser)
127	}
128
129	#[inline(always)]
130	fn parse_infallible<C>(chars: C) -> Result<Meta<Self, Span>, MetaError<lexing::Error, Span>>
131	where
132		C: Iterator<Item = DecodedChar>,
133	{
134		Self::parse(chars.map(Ok))
135	}
136
137	#[inline(always)]
138	fn parse_utf8<C, E>(chars: C) -> Result<Meta<Self, Span>, MetaError<lexing::Error<E>, Span>>
139	where
140		C: Iterator<Item = Result<char, E>>,
141	{
142		Self::parse(decoded_char::FallibleUtf8Decoded::new(chars))
143	}
144
145	#[inline(always)]
146	fn parse_utf8_infallible<C>(
147		chars: C,
148	) -> Result<Meta<Self, Span>, MetaError<lexing::Error, Span>>
149	where
150		C: Iterator<Item = char>,
151	{
152		Self::parse_infallible(decoded_char::Utf8Decoded::new(chars))
153	}
154
155	#[inline(always)]
156	fn parse_utf16<C, E>(chars: C) -> Result<Meta<Self, Span>, MetaError<lexing::Error<E>, Span>>
157	where
158		C: Iterator<Item = Result<char, E>>,
159	{
160		Self::parse(decoded_char::FallibleUtf16Decoded::new(chars))
161	}
162
163	#[inline(always)]
164	fn parse_utf16_infallible<C>(
165		chars: C,
166	) -> Result<Meta<Self, Span>, MetaError<lexing::Error, Span>>
167	where
168		C: Iterator<Item = char>,
169	{
170		Self::parse_infallible(decoded_char::Utf16Decoded::new(chars))
171	}
172
173	#[inline(always)]
174	fn parse_str(string: &str) -> Result<Meta<Self, Span>, MetaError<lexing::Error, Span>> {
175		Self::parse_utf8_infallible(string.chars())
176	}
177}
178
179impl Parse for IriBuf {
180	fn parse_with<L>(parser: &mut Parser<L>) -> Result<Meta<Self, Span>, MetaError<L::Error, Span>>
181	where
182		L: Tokens,
183	{
184		match parser.next()? {
185			Meta(Some(Token::Iri(iri)), span) => Ok(Meta(iri, span)),
186			Meta(unexpected, span) => Err(Meta(Box::new(Error::Unexpected(unexpected)), span)),
187		}
188	}
189}
190
191impl Parse for crate::Subject {
192	fn parse_with<L>(parser: &mut Parser<L>) -> Result<Meta<Self, Span>, MetaError<L::Error, Span>>
193	where
194		L: Tokens,
195	{
196		match parser.next()? {
197			Meta(Some(Token::Iri(iri)), span) => Ok(Meta(Self::Iri(iri), span)),
198			Meta(Some(Token::BlankNodeLabel(label)), span) => Ok(Meta(Self::Blank(label), span)),
199			Meta(unexpected, span) => Err(Meta(Box::new(Error::Unexpected(unexpected)), span)),
200		}
201	}
202}
203
204impl Parse for Literal {
205	fn parse_with<L>(parser: &mut Parser<L>) -> Result<Meta<Self, Span>, MetaError<L::Error, Span>>
206	where
207		L: Tokens,
208	{
209		match parser.next()? {
210			Meta(Some(Token::StringLiteral(string)), span) => {
211				parser.parse_literal(Meta(string, span))
212			}
213			Meta(unexpected, span) => Err(Meta(Box::new(Error::Unexpected(unexpected)), span)),
214		}
215	}
216}
217
218impl Parse for crate::Object {
219	fn parse_with<L>(parser: &mut Parser<L>) -> Result<Meta<Self, Span>, MetaError<L::Error, Span>>
220	where
221		L: Tokens,
222	{
223		match parser.next()? {
224			Meta(Some(Token::Iri(iri)), span) => Ok(Meta(Self::Id(Id::Iri(iri)), span)),
225			Meta(Some(Token::BlankNodeLabel(label)), span) => {
226				Ok(Meta(Self::Id(Id::Blank(label)), span))
227			}
228			Meta(Some(Token::StringLiteral(string)), string_span) => {
229				let Meta(lit, loc) = parser.parse_literal(Meta(string, string_span))?;
230				Ok(Meta(Self::Literal(lit), loc))
231			}
232			Meta(unexpected, span) => Err(Meta(Box::new(Error::Unexpected(unexpected)), span)),
233		}
234	}
235}
236
237impl Parse for crate::Quad {
238	fn parse_with<L>(parser: &mut Parser<L>) -> Result<Meta<Self, Span>, MetaError<L::Error, Span>>
239	where
240		L: Tokens,
241	{
242		let mut span = parser.begin()?;
243		let subject = crate::Subject::parse_with(parser)?;
244		let predicate = IriBuf::parse_with(parser)?;
245		let object = crate::Object::parse_with(parser)?;
246		let graph = match parser.next()? {
247			Meta(Some(Token::Dot), _) => None,
248			opt_token => {
249				let graph_label = match opt_token {
250					Meta(Some(Token::Iri(iri)), span) => Meta(crate::GraphLabel::Iri(iri), span),
251					Meta(Some(Token::BlankNodeLabel(label)), span) => {
252						Meta(crate::GraphLabel::Blank(label), span)
253					}
254					Meta(unexpected, span) => {
255						return Err(Meta(Box::new(Error::Unexpected(unexpected)), span))
256					}
257				};
258
259				match parser.next()? {
260					Meta(Some(Token::Dot), _) => Some(graph_label),
261					Meta(unexpected, span) => {
262						return Err(Meta(Box::new(Error::Unexpected(unexpected)), span))
263					}
264				}
265			}
266		};
267
268		span.append(parser.last_span());
269		Ok(Meta(
270			crate::Quad::new(subject, predicate, object, graph),
271			span,
272		))
273	}
274}
275
276impl Parse for crate::GrdfQuad {
277	fn parse_with<L>(parser: &mut Parser<L>) -> Result<Meta<Self, Span>, MetaError<L::Error, Span>>
278	where
279		L: Tokens,
280	{
281		let mut span = parser.begin()?;
282		let subject = crate::Term::parse_with(parser)?;
283		let predicate = crate::Term::parse_with(parser)?;
284		let object = crate::Term::parse_with(parser)?;
285		let graph = match parser.next()? {
286			Meta(Some(Token::Dot), _) => None,
287			opt_token => {
288				let graph_label = match opt_token {
289					Meta(Some(Token::Iri(iri)), span) => Meta(crate::Term::Id(Id::Iri(iri)), span),
290					Meta(Some(Token::BlankNodeLabel(label)), span) => {
291						Meta(crate::Term::Id(Id::Blank(label)), span)
292					}
293					Meta(Some(Token::StringLiteral(string)), string_span) => {
294						let Meta(lit, meta) = parser.parse_literal(Meta(string, string_span))?;
295						Meta(crate::Term::Literal(lit), meta)
296					}
297					Meta(unexpected, span) => {
298						return Err(Meta(Box::new(Error::Unexpected(unexpected)), span))
299					}
300				};
301
302				match parser.next()? {
303					Meta(Some(Token::Dot), _) => Some(graph_label),
304					Meta(unexpected, span) => {
305						return Err(Meta(Box::new(Error::Unexpected(unexpected)), span))
306					}
307				}
308			}
309		};
310
311		span.append(parser.last_span());
312		Ok(Meta(
313			crate::GrdfQuad::new(subject, predicate, object, graph),
314			span,
315		))
316	}
317}
318
319impl Parse for crate::Document {
320	fn parse_with<L>(parser: &mut Parser<L>) -> Result<Meta<Self, Span>, MetaError<L::Error, Span>>
321	where
322		L: Tokens,
323	{
324		let mut quads = Vec::new();
325		let mut span = parser.begin()?;
326
327		loop {
328			match parser.peek()? {
329				Meta(Some(_), _) => {
330					quads.push(crate::Quad::parse_with(parser)?);
331				}
332				Meta(None, end) => {
333					span.append(end);
334					break;
335				}
336			}
337		}
338
339		Ok(Meta(quads, span))
340	}
341}
342
343impl Parse for crate::GrdfDocument {
344	fn parse_with<L>(parser: &mut Parser<L>) -> Result<Meta<Self, Span>, MetaError<L::Error, Span>>
345	where
346		L: Tokens,
347	{
348		let mut quads = Vec::new();
349		let mut span = parser.begin()?;
350
351		loop {
352			match parser.peek()? {
353				Meta(Some(_), _) => {
354					quads.push(crate::GrdfQuad::parse_with(parser)?);
355				}
356				Meta(None, end) => {
357					span.append(end);
358					break;
359				}
360			}
361		}
362
363		Ok(Meta(quads, span))
364	}
365}