turtle_syntax_next/
lexing.rs

1use crate::{BlankIdBuf, DecimalBuf, DoubleBuf, IntegerBuf, NumericLiteral};
2use decoded_char::DecodedChar;
3use iref::IriRefBuf;
4use langtag::LangTagBuf;
5use locspan::Span;
6use std::fmt;
7use std::iter::Peekable;
8use std::str::FromStr;
9
10/// Fallible tokens iterator with lookahead.
11pub trait Tokens {
12	type Error;
13
14	#[allow(clippy::type_complexity)]
15	fn peek(&mut self) -> Result<(Option<&Token>, Span), (Self::Error, Span)>;
16
17	#[allow(clippy::type_complexity)]
18	fn next(&mut self) -> Result<(Option<Token>, Span), (Self::Error, Span)>;
19
20	/// Returns the span of the last parsed token.
21	fn last(&self) -> Span;
22}
23
24/// Unexpected char or end of file.
25#[derive(Debug, thiserror::Error)]
26pub enum Unexpected {
27	#[error("unexpected character `{0}`")]
28	Char(char),
29
30	#[error("unexpected end of file")]
31	EndOfFile,
32}
33
34impl From<Option<char>> for Unexpected {
35	fn from(value: Option<char>) -> Self {
36		match value {
37			Some(c) => Self::Char(c),
38			None => Self::EndOfFile,
39		}
40	}
41}
42
43/// Lexing error.
44#[derive(Debug, thiserror::Error)]
45pub enum Error<E = std::convert::Infallible> {
46	#[error("invalid language tag")]
47	InvalidLangTag,
48
49	#[error("invalid character code point {0:x}")]
50	InvalidCodepoint(u32),
51
52	#[error("invalid IRI reference <{0}>")]
53	InvalidIriRef(iref::iri::InvalidIriRef<String>),
54
55	#[error(transparent)]
56	Unexpected(Unexpected),
57
58	#[error(transparent)]
59	Stream(E),
60}
61
62/// Token.
63#[derive(Debug)]
64pub enum Token {
65	Keyword(Keyword),
66	Begin(Delimiter),
67	End(Delimiter),
68	LangTag(LangTagBuf),
69	IriRef(IriRefBuf),
70	StringLiteral(String),
71	BlankNodeLabel(BlankIdBuf),
72	Punct(Punct),
73	CompactIri((String, Span), (String, Span)),
74	Numeric(NumericLiteral),
75}
76
77impl fmt::Display for Token {
78	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
79		match self {
80			Self::Keyword(kw) => write!(f, "keyword `{kw}`"),
81			Self::Begin(d) => write!(f, "opening `{}`", d.begin()),
82			Self::End(d) => write!(f, "closing `{}`", d.end()),
83			Self::LangTag(tag) => write!(f, "language tag `{tag}`"),
84			Self::IriRef(iri_ref) => write!(f, "IRI reference <{iri_ref}>"),
85			Self::StringLiteral(string) => {
86				write!(f, "string literal \"{}\"", DisplayStringLiteral(string))
87			}
88			Self::BlankNodeLabel(label) => write!(f, "blank node label `{label}`"),
89			Self::Punct(p) => p.fmt(f),
90			Self::CompactIri((prefix, _), (suffix, _)) => {
91				write!(f, "compact IRI `{prefix}:{suffix}`")
92			}
93			Self::Numeric(n) => write!(f, "numeric literal `{n}`"),
94		}
95	}
96}
97
98/// Wrapper to display string literals.
99pub struct DisplayStringLiteral<'a>(pub &'a str);
100
101impl fmt::Display for DisplayStringLiteral<'_> {
102	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
103		for c in self.0.chars() {
104			match c {
105				'"' => write!(f, "\\u0022"),
106				'\\' => write!(f, "\\u005c"),
107				'\n' => write!(f, "\\n"),
108				'\r' => write!(f, "\\r"),
109				'\t' => write!(f, "\\t"),
110				'\u{08}' => write!(f, "\\b"),
111				'\u{0c}' => write!(f, "\\f"),
112				c => c.fmt(f),
113			}?
114		}
115
116		Ok(())
117	}
118}
119
120#[derive(Debug)]
121pub enum Keyword {
122	A,
123	Prefix,
124	Base,
125	SparqlBase,
126	SparqlPrefix,
127	True,
128	False,
129}
130
131#[derive(Clone)]
132pub struct NotAKeyword;
133
134impl FromStr for Keyword {
135	type Err = NotAKeyword;
136
137	fn from_str(s: &str) -> Result<Self, Self::Err> {
138		if s == "a" {
139			Ok(Self::A)
140		} else if s == "true" {
141			Ok(Self::True)
142		} else if s == "false" {
143			Ok(Self::False)
144		} else if s == unicase::Ascii::new("BASE") {
145			Ok(Self::SparqlBase)
146		} else if s == unicase::Ascii::new("PREFIX") {
147			Ok(Self::SparqlPrefix)
148		} else if s == "@prefix" {
149			Ok(Self::Prefix)
150		} else if s == "@base" {
151			Ok(Self::Base)
152		} else {
153			Err(NotAKeyword)
154		}
155	}
156}
157
158impl fmt::Display for Keyword {
159	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
160		match self {
161			Self::A => write!(f, "a"),
162			Self::Prefix => write!(f, "@prefix"),
163			Self::Base => write!(f, "@base"),
164			Self::SparqlBase => write!(f, "BASE"),
165			Self::SparqlPrefix => write!(f, "PREFIX"),
166			Self::True => write!(f, "true"),
167			Self::False => write!(f, "false"),
168		}
169	}
170}
171
172#[derive(Debug)]
173pub enum Delimiter {
174	Parenthesis,
175	Bracket,
176}
177
178impl Delimiter {
179	pub fn begin(&self) -> char {
180		match self {
181			Self::Parenthesis => '(',
182			Self::Bracket => '[',
183		}
184	}
185
186	pub fn end(&self) -> char {
187		match self {
188			Self::Parenthesis => ')',
189			Self::Bracket => ']',
190		}
191	}
192}
193
194#[derive(Debug)]
195pub enum Punct {
196	Period,
197	Semicolon,
198	Comma,
199	Carets,
200}
201
202impl fmt::Display for Punct {
203	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
204		match self {
205			Self::Period => write!(f, "dot `.`"),
206			Self::Semicolon => write!(f, "semicolon `;`"),
207			Self::Comma => write!(f, "comma `,`"),
208			Self::Carets => write!(f, "carets `^^`"),
209		}
210	}
211}
212
213/// Lexer position.
214struct Position {
215	span: Span,
216	last_span: Span,
217}
218
219impl Position {
220	fn current(&self) -> Span {
221		self.span
222	}
223
224	fn end(&self) -> Span {
225		self.span.end.into()
226	}
227
228	fn last(&self) -> Span {
229		self.last_span
230	}
231}
232
233/// Lexer.
234///
235/// Changes a character iterator into a `Token` iterator.
236pub struct Lexer<C: Iterator<Item = Result<DecodedChar, E>>, E> {
237	chars: Peekable<C>,
238	pos: Position,
239	lookahead: Option<(Token, Span)>,
240}
241
242impl<C: Iterator<Item = Result<DecodedChar, E>>, E> Lexer<C, E> {
243	pub fn new(chars: C) -> Self {
244		Self {
245			chars: chars.peekable(),
246			pos: Position {
247				span: Span::default(),
248				last_span: Span::default(),
249			},
250			lookahead: None,
251		}
252	}
253}
254
255enum LanguageTagOrKeyword {
256	Keyword(Keyword),
257	LanguageTag(LangTagBuf),
258}
259
260enum NameOrKeyword {
261	Keyword(Keyword),
262	CompactIri((String, Span), (String, Span)),
263}
264
265enum NumericOrPeriod {
266	Numeric(NumericLiteral),
267	Period,
268}
269
270impl<C: Iterator<Item = Result<DecodedChar, E>>, E> Lexer<C, E> {
271	fn peek_char(&mut self) -> Result<Option<char>, (Error<E>, Span)> {
272		match self.chars.peek() {
273			None => Ok(None),
274			Some(Ok(c)) => Ok(Some(c.chr())),
275			Some(Err(_)) => self.next_char(),
276		}
277	}
278
279	fn next_char(&mut self) -> Result<Option<char>, (Error<E>, Span)> {
280		match self.chars.next() {
281			None => Ok(None),
282			Some(Ok(c)) => {
283				self.pos.span.push(c.len());
284				self.pos.last_span.clear();
285				self.pos.last_span.push(c.len());
286				Ok(Some(c.chr()))
287			}
288			Some(Err(e)) => Err((Error::Stream(e), self.pos.end())),
289		}
290	}
291
292	fn expect_char(&mut self) -> Result<char, (Error<E>, Span)> {
293		self.next_char()?
294			.ok_or_else(|| (Error::Unexpected(Unexpected::EndOfFile), self.pos.end()))
295	}
296
297	fn skip_whitespaces(&mut self) -> Result<(), (Error<E>, Span)> {
298		while let Some(c) = self.peek_char()? {
299			if c.is_whitespace() {
300				self.next_char()?;
301			} else if c == '#' {
302				self.next_comment()?;
303			} else {
304				break;
305			}
306		}
307
308		self.pos.span.clear();
309		Ok(())
310	}
311
312	/// Parses the rest of a comment, after the first `#` character.
313	///
314	/// Comments in N-Quads take the form of `#`,
315	/// outside an IRIREF or STRING_LITERAL_QUOTE,
316	/// and continue to the end of line (EOL) or end of file
317	/// if there is no end of line after the comment marker.
318	fn next_comment(&mut self) -> Result<(), (Error<E>, Span)> {
319		loop {
320			if matches!(self.next_char()?, None | Some('\n')) {
321				break Ok(());
322			}
323		}
324	}
325
326	/// Parses the rest of a lang tag, after the first `@` character.
327	fn next_langtag_or_keyword(
328		&mut self,
329	) -> Result<(LanguageTagOrKeyword, Span), (Error<E>, Span)> {
330		let mut tag = String::new();
331
332		loop {
333			match self.peek_char()? {
334				None => {
335					if tag.is_empty() {
336						return Err((Error::InvalidLangTag, self.pos.current()));
337					} else {
338						break;
339					}
340				}
341				Some(c) => {
342					if c.is_ascii_alphabetic() {
343						tag.push(self.expect_char()?);
344					} else if tag.is_empty() {
345						return Err((Error::InvalidLangTag, self.pos.current()));
346					} else {
347						break;
348					}
349				}
350			}
351		}
352
353		let mut empty_subtag = true;
354		if let Some('-') = self.peek_char()? {
355			tag.push(self.expect_char()?);
356			loop {
357				match self.peek_char()? {
358					Some('-') if !empty_subtag => tag.push(self.expect_char()?),
359					Some(c) if c.is_ascii_alphanumeric() => {
360						empty_subtag = false;
361						tag.push(self.expect_char()?)
362					}
363					Some(c) => {
364						if c.is_whitespace() {
365							if empty_subtag {
366								return Err((Error::InvalidLangTag, self.pos.current()));
367							} else {
368								break;
369							}
370						} else {
371							self.next_char()?;
372							return Err((Error::Unexpected(Unexpected::Char(c)), self.pos.last()));
373						}
374					}
375					None => {
376						if empty_subtag {
377							return Err((Error::InvalidLangTag, self.pos.current()));
378						} else {
379							break;
380						}
381					}
382				}
383			}
384		}
385
386		match tag.as_str() {
387			"prefix" => Ok((
388				LanguageTagOrKeyword::Keyword(Keyword::Prefix),
389				self.pos.current(),
390			)),
391			"base" => Ok((
392				LanguageTagOrKeyword::Keyword(Keyword::Base),
393				self.pos.current(),
394			)),
395			_ => match LangTagBuf::new(tag) {
396				Ok(tag) => Ok((LanguageTagOrKeyword::LanguageTag(tag), self.pos.current())),
397				Err(_) => Err((Error::InvalidLangTag, self.pos.current())),
398			},
399		}
400	}
401
402	/// Parses an IRI reference, starting after the first `<` until the closing
403	/// `>`.
404	fn next_iriref(&mut self) -> Result<(IriRefBuf, Span), (Error<E>, Span)> {
405		let mut iriref = String::new();
406
407		loop {
408			match self.next_char()? {
409				Some('>') => break,
410				Some('\\') => {
411					let span = self.pos.last();
412					let c = match self.next_char()? {
413						Some('u') => self.next_hex_char(span, 4)?,
414						Some('U') => self.next_hex_char(span, 8)?,
415						unexpected => {
416							return Err((Error::Unexpected(unexpected.into()), self.pos.last()))
417						}
418					};
419
420					iriref.push(c)
421				}
422				Some(c) => {
423					if matches!(
424						c,
425						'\u{00}'..='\u{20}' | '<' | '>' | '"' | '{' | '}' | '|' | '^' | '`' | '\\'
426					) {
427						return Err((Error::Unexpected(Unexpected::Char(c)), self.pos.last()));
428					}
429
430					iriref.push(c)
431				}
432				None => return Err((Error::Unexpected(Unexpected::EndOfFile), self.pos.end())),
433			}
434		}
435
436		match IriRefBuf::new(iriref) {
437			Ok(iriref) => Ok((iriref, self.pos.current())),
438			// NOTE Dropped string
439			Err(e) => Err((Error::InvalidIriRef(e), self.pos.current())),
440		}
441	}
442
443	fn next_hex_char(&mut self, mut span: Span, len: u8) -> Result<char, (Error<E>, Span)> {
444		let mut codepoint = 0;
445
446		for _ in 0..len {
447			let c = self.expect_char()?;
448			match c.to_digit(16) {
449				Some(d) => codepoint = (codepoint << 4) | d,
450				None => return Err((Error::Unexpected(Unexpected::Char(c)), self.pos.last())),
451			}
452		}
453
454		// NOTE set_end method had a check
455		span.end = self.pos.current().end;
456		match char::try_from(codepoint) {
457			Ok(c) => Ok(c),
458			Err(_) => Err((Error::InvalidCodepoint(codepoint), span)),
459		}
460	}
461
462	/// Parses a string literal, starting after the first `"` until the closing
463	/// `"`.
464	fn next_string_literal(&mut self, delimiter: char) -> Result<(String, Span), (Error<E>, Span)> {
465		let mut string = String::new();
466
467		let mut long = false;
468
469		loop {
470			match self.next_char()? {
471				Some(c) if c == delimiter => {
472					if !long {
473						if string.is_empty() && self.peek_char()? == Some(delimiter) {
474							self.next_char()?;
475							long = true;
476						} else {
477							break;
478						}
479					} else if self.peek_char()? == Some(delimiter) {
480						self.next_char()?;
481						if self.peek_char()? == Some(delimiter) {
482							self.next_char()?;
483							break;
484						} else {
485							string.push(delimiter);
486							string.push(delimiter);
487						}
488					} else {
489						string.push(delimiter);
490					}
491				}
492				Some('\\') => {
493					let span = self.pos.last();
494					let c = match self.next_char()? {
495						Some('u') => self.next_hex_char(span, 4)?,
496						Some('U') => self.next_hex_char(span, 8)?,
497						Some('t') => '\t',
498						Some('b') => '\u{08}',
499						Some('n') => '\n',
500						Some('r') => '\r',
501						Some('f') => '\u{0c}',
502						Some('\'') => '\'',
503						Some('"') => '"',
504						Some('\\') => '\\',
505						unexpected => {
506							return Err((Error::Unexpected(unexpected.into()), self.pos.last()))
507						}
508					};
509
510					string.push(c)
511				}
512				Some(c) => {
513					// if !long && matches!(c, '\n' | '\r') {
514					// 	return Err((Error::Unexpected(Unexpected::Char(c)), self.pos.last()));
515					// }
516					string.push(c)
517				}
518				None => return Err((Error::Unexpected(Unexpected::EndOfFile), self.pos.end())),
519			}
520		}
521
522		Ok((string, self.pos.current()))
523	}
524
525	/// Parses an IRI reference, starting after the first `<` until the closing
526	/// `>`.
527	fn next_numeric_or_dot(
528		&mut self,
529		first: char,
530	) -> Result<(NumericOrPeriod, Span), (Error<E>, Span)> {
531		let mut buffer: String = first.into();
532
533		enum State {
534			NonEmptyInteger,
535			Integer,
536			NonENonEmptyDecimal,
537			NonEmptyDecimal,
538			Decimal,
539			ExponentSign,
540			NonEmptyExponent,
541			Exponent,
542		}
543
544		let mut state = match first {
545			'+' => State::NonEmptyInteger,
546			'-' => State::NonEmptyInteger,
547			'.' => State::NonENonEmptyDecimal,
548			'0'..='9' => State::Integer,
549			_ => panic!("invalid first numeric character"),
550		};
551
552		loop {
553			state = match state {
554				State::NonEmptyInteger => match self.peek_char()? {
555					Some('0'..='9') => State::Integer,
556					Some('.') => State::NonEmptyDecimal,
557					unexpected => {
558						return Err((Error::Unexpected(unexpected.into()), self.pos.last()))
559					}
560				},
561				State::Integer => match self.peek_char()? {
562					Some('0'..='9') => State::Integer,
563					Some('.') => State::NonEmptyDecimal,
564					Some('e' | 'E') => State::ExponentSign,
565					_ => break,
566				},
567				State::NonENonEmptyDecimal => match self.peek_char()? {
568					Some('0'..='9') => State::Decimal,
569					_ => return Ok((NumericOrPeriod::Period, self.pos.current())),
570				},
571				State::NonEmptyDecimal => match self.peek_char()? {
572					Some('0'..='9') => State::Decimal,
573					Some('e' | 'E') => State::ExponentSign,
574					unexpected => {
575						return Err((Error::Unexpected(unexpected.into()), self.pos.last()))
576					}
577				},
578				State::Decimal => match self.peek_char()? {
579					Some('0'..='9') => State::Decimal,
580					Some('e' | 'E') => State::ExponentSign,
581					_ => break,
582				},
583				State::ExponentSign => match self.peek_char()? {
584					Some('+' | '-') => State::NonEmptyExponent,
585					Some('0'..='9') => State::Exponent,
586					unexpected => {
587						return Err((Error::Unexpected(unexpected.into()), self.pos.last()))
588					}
589				},
590				State::NonEmptyExponent => match self.peek_char()? {
591					Some('0'..='9') => State::Exponent,
592					unexpected => {
593						return Err((Error::Unexpected(unexpected.into()), self.pos.last()))
594					}
595				},
596				State::Exponent => match self.peek_char()? {
597					Some('0'..='9') => State::Exponent,
598					_ => break,
599				},
600			};
601
602			buffer.push(self.expect_char()?);
603		}
604
605		let n = match state {
606			State::Integer => NumericLiteral::Integer(unsafe { IntegerBuf::new_unchecked(buffer) }),
607			State::Decimal => NumericLiteral::Decimal(unsafe { DecimalBuf::new_unchecked(buffer) }),
608			State::Exponent => NumericLiteral::Double(unsafe { DoubleBuf::new_unchecked(buffer) }),
609			_ => unreachable!(),
610		};
611
612		Ok((NumericOrPeriod::Numeric(n), self.pos.current()))
613	}
614
615	/// Parses a blank node label, starting after the first `_`.
616	fn next_blank_node_label(&mut self) -> Result<(BlankIdBuf, Span), (Error<E>, Span)> {
617		match self.next_char()? {
618			Some(':') => {
619				let mut label = String::new();
620				label.push('_');
621				label.push(':');
622				match self.next_char()? {
623					Some(c) if c.is_ascii_digit() || is_pn_chars_u(c) => {
624						label.push(c);
625						let mut last_is_pn_chars = true;
626						loop {
627							match self.peek_char()? {
628								Some(c) if is_pn_chars(c) => {
629									label.push(self.expect_char()?);
630									last_is_pn_chars = true
631								}
632								Some('.') => {
633									label.push(self.expect_char()?);
634									last_is_pn_chars = false;
635								}
636								_ if last_is_pn_chars => break,
637								unexpected => {
638									return Err((
639										Error::Unexpected(unexpected.into()),
640										self.pos.last(),
641									))
642								}
643							}
644						}
645
646						Ok((
647							unsafe { BlankIdBuf::new_unchecked(label) },
648							self.pos.current(),
649						))
650					}
651					unexpected => Err((Error::Unexpected(unexpected.into()), self.pos.last())),
652				}
653			}
654			unexpected => Err((Error::Unexpected(unexpected.into()), self.pos.last())),
655		}
656	}
657
658	fn next_escape(&mut self) -> Result<char, (Error<E>, Span)> {
659		match self.next_char()? {
660			Some(
661				c @ ('_' | '~' | '.' | '-' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ','
662				| ';' | '=' | '/' | '?' | '#' | '@' | '%'),
663			) => Ok(c),
664			unexpected => Err((Error::Unexpected(unexpected.into()), self.pos.last())),
665		}
666	}
667
668	fn next_name_or_keyword(&mut self, c: char) -> Result<(NameOrKeyword, Span), (Error<E>, Span)> {
669		// PNAME_NS or Keyword
670		let namespace = match c {
671			':' => (String::new(), self.pos.current()),
672			c if is_pn_chars_base(c) => {
673				let mut namespace = String::new();
674				namespace.push(c);
675				let mut last_is_pn_chars = true;
676				let span = loop {
677					match self.peek_char()? {
678						Some(c) if is_pn_chars(c) => {
679							namespace.push(self.expect_char()?);
680							last_is_pn_chars = true
681						}
682						Some('.') => {
683							namespace.push(self.expect_char()?);
684							last_is_pn_chars = false;
685						}
686						Some(':') if last_is_pn_chars => {
687							let span = self.pos.current();
688							self.expect_char()?;
689							break span;
690						}
691						unexpected => {
692							return if unexpected.map(|c| c.is_whitespace()).unwrap_or(true) {
693								match Keyword::from_str(&namespace) {
694									Ok(kw) => Ok((NameOrKeyword::Keyword(kw), self.pos.current())),
695									Err(NotAKeyword) => break self.pos.current(),
696								}
697							} else {
698								Err((Error::Unexpected(unexpected.into()), self.pos.end()))
699							}
700						}
701					}
702				};
703
704				(namespace, span)
705			}
706			unexpected => {
707				return Err((
708					Error::Unexpected(Unexpected::Char(unexpected)),
709					self.pos.last(),
710				))
711			}
712		};
713
714		// PN_LOCAL
715		let mut suffix = String::new();
716		let mut suffix_span = self.pos.current().next();
717		match self.peek_char()? {
718			Some(c) if is_pn_chars_u(c) || c.is_ascii_digit() || matches!(c, ':' | '%' | '\\') => {
719				let c = match self.expect_char()? {
720					'%' => {
721						// percent encoded.
722						self.next_hex_char(self.pos.current().end.into(), 2)?
723					}
724					'\\' => {
725						// escape sequence.
726						self.next_escape()?
727					}
728					c => c,
729				};
730
731				suffix.push(c);
732
733				loop {
734					match self.peek_char()? {
735						Some(c)
736							if is_pn_chars(c)
737								|| c.is_ascii_digit() || matches!(c, ':' | '%' | '\\') =>
738						{
739							let c = match self.expect_char()? {
740								'%' => {
741									// percent encoded.
742									self.next_hex_char(self.pos.current().end.into(), 2)?
743								}
744								'\\' => {
745									// escape sequence.
746									self.next_escape()?
747								}
748								c => c,
749							};
750
751							suffix.push(c);
752						}
753						_ => {
754							// NOTE set_end had a check
755							suffix_span.end = self.pos.current().end;
756							break Ok((
757								NameOrKeyword::CompactIri(namespace, (suffix, suffix_span)),
758								self.pos.current(),
759							));
760						}
761					}
762				}
763			}
764			_ => Ok((
765				NameOrKeyword::CompactIri(namespace, (String::new(), self.pos.current())),
766				self.pos.current(),
767			)),
768		}
769	}
770
771	pub fn consume(&mut self) -> Result<(Option<Token>, Span), (Error<E>, Span)> {
772		self.skip_whitespaces()?;
773		match self.next_char()? {
774			Some('@') => Ok({
775				let t = self.next_langtag_or_keyword()?;
776				let token = match t.0 {
777					LanguageTagOrKeyword::LanguageTag(tag) => Token::LangTag(tag),
778					LanguageTagOrKeyword::Keyword(kw) => Token::Keyword(kw),
779				};
780
781				(Some(token), t.1)
782			}),
783			Some('<') => Ok({
784				let t = self.next_iriref()?;
785				(Some(Token::IriRef(t.0)), t.1)
786			}),
787			Some('"') => Ok({
788				let t = self.next_string_literal('"')?;
789
790				(Some(Token::StringLiteral(t.0)), t.1)
791			}),
792			Some('\'') => Ok({
793				let t = self.next_string_literal('\'')?;
794
795				(Some(Token::StringLiteral(t.0)), t.1)
796			}),
797			Some('_') => Ok({
798				let t = self.next_blank_node_label()?;
799
800				(Some(Token::BlankNodeLabel(t.0)), t.1)
801			}),
802			Some(',') => Ok((Some(Token::Punct(Punct::Comma)), self.pos.current())),
803			Some(';') => Ok((Some(Token::Punct(Punct::Semicolon)), self.pos.current())),
804			Some('^') => match self.next_char()? {
805				Some('^') => Ok((Some(Token::Punct(Punct::Carets)), self.pos.current())),
806				unexpected => Err((Error::Unexpected(unexpected.into()), self.pos.last())),
807			},
808			Some('(') => Ok((
809				Some(Token::Begin(Delimiter::Parenthesis)),
810				self.pos.current(),
811			)),
812			Some('[') => Ok((Some(Token::Begin(Delimiter::Bracket)), self.pos.current())),
813			Some(')') => Ok((Some(Token::End(Delimiter::Parenthesis)), self.pos.current())),
814			Some(']') => Ok((Some(Token::End(Delimiter::Bracket)), self.pos.current())),
815			Some(c @ ('+' | '-' | '0'..='9' | '.')) => Ok({
816				let t = self.next_numeric_or_dot(c)?;
817
818				let token = match t.0 {
819					NumericOrPeriod::Numeric(n) => Token::Numeric(n),
820					NumericOrPeriod::Period => Token::Punct(Punct::Period),
821				};
822
823				(Some(token), t.1)
824			}),
825			Some(c) => Ok({
826				let t = self.next_name_or_keyword(c)?;
827
828				let token = match t.0 {
829					NameOrKeyword::Keyword(kw) => Token::Keyword(kw),
830					NameOrKeyword::CompactIri(p, s) => Token::CompactIri(p, s),
831				};
832
833				(Some(token), t.1)
834			}),
835			None => Ok((None, self.pos.end())),
836		}
837	}
838
839	#[allow(clippy::type_complexity)]
840	pub fn peek(&mut self) -> Result<(Option<&Token>, Span), (Error<E>, Span)> {
841		if self.lookahead.is_none() {
842			if let (Some(token), loc) = self.consume()? {
843				self.lookahead = Some((token, loc));
844			}
845		}
846
847		match &self.lookahead {
848			Some((token, loc)) => Ok((Some(token), *loc)),
849			None => Ok((None, self.pos.end())),
850		}
851	}
852
853	#[allow(clippy::type_complexity, clippy::should_implement_trait)]
854	pub fn next(&mut self) -> Result<(Option<Token>, Span), (Error<E>, Span)> {
855		match self.lookahead.take() {
856			Some((token, loc)) => Ok((Some(token), loc)),
857			None => self.consume(),
858		}
859	}
860}
861
862impl<E, C: Iterator<Item = Result<DecodedChar, E>>> Tokens for Lexer<C, E> {
863	type Error = Error<E>;
864
865	fn peek(&mut self) -> Result<(Option<&Token>, Span), (Error<E>, Span)> {
866		self.peek()
867	}
868
869	fn next(&mut self) -> Result<(Option<Token>, Span), (Error<E>, Span)> {
870		self.next()
871	}
872
873	fn last(&self) -> Span {
874		self.pos.last_span
875	}
876}
877
878impl<E, C: Iterator<Item = Result<DecodedChar, E>>> Iterator for Lexer<C, E> {
879	type Item = Result<(Token, Span), (Error<E>, Span)>;
880
881	fn next(&mut self) -> Option<Self::Item> {
882		match self.next() {
883			Ok((Some(token), loc)) => Some(Ok((token, loc))),
884			Ok((None, _)) => None,
885			Err(e) => Some(Err(e)),
886		}
887	}
888}
889
890fn is_pn_chars_base(c: char) -> bool {
891	matches!(c, 'A'..='Z' | 'a'..='z' | '\u{00c0}'..='\u{00d6}' | '\u{00d8}'..='\u{00f6}' | '\u{00f8}'..='\u{02ff}' | '\u{0370}'..='\u{037d}' | '\u{037f}'..='\u{1fff}' | '\u{200c}'..='\u{200d}' | '\u{2070}'..='\u{218f}' | '\u{2c00}'..='\u{2fef}' | '\u{3001}'..='\u{d7ff}' | '\u{f900}'..='\u{fdcf}' | '\u{fdf0}'..='\u{fffd}' | '\u{10000}'..='\u{effff}')
892}
893
894fn is_pn_chars_u(c: char) -> bool {
895	is_pn_chars_base(c) || c == '_'
896}
897
898fn is_pn_chars(c: char) -> bool {
899	is_pn_chars_u(c)
900		|| matches!(c, '-' | '0'..='9' | '\u{00b7}' | '\u{0300}'..='\u{036f}' | '\u{203f}'..='\u{2040}')
901}