turtle_syntax/
lexing.rs

1use crate::{BlankIdBuf, DecimalBuf, DoubleBuf, IntegerBuf, NumericLiteral};
2use decoded_char::DecodedChar;
3use iref::IriRefBuf;
4use langtag::LanguageTagBuf;
5use locspan::{Meta, Span};
6use std::fmt;
7use std::iter::Peekable;
8use std::str::FromStr;
9
10/// Fallible tokens iterator with lookahead.
11pub trait Tokens {
12	type Error;
13
14	#[allow(clippy::type_complexity)]
15	fn peek(&mut self) -> Result<Meta<Option<&Token>, Span>, Meta<Self::Error, Span>>;
16
17	#[allow(clippy::type_complexity)]
18	fn next(&mut self) -> Result<Meta<Option<Token>, Span>, Meta<Self::Error, Span>>;
19
20	/// Returns the span of the last parsed token.
21	fn last(&self) -> Span;
22}
23
24/// Unexpected char or end of file.
25#[derive(Debug, thiserror::Error)]
26pub enum Unexpected {
27	#[error("unexpected character `{0}`")]
28	Char(char),
29
30	#[error("unexpected end of file")]
31	EndOfFile,
32}
33
34impl From<Option<char>> for Unexpected {
35	fn from(value: Option<char>) -> Self {
36		match value {
37			Some(c) => Self::Char(c),
38			None => Self::EndOfFile,
39		}
40	}
41}
42
43/// Lexing error.
44#[derive(Debug, thiserror::Error)]
45pub enum Error<E = std::convert::Infallible> {
46	#[error("invalid language tag")]
47	InvalidLangTag,
48
49	#[error("invalid character code point {0:x}")]
50	InvalidCodepoint(u32),
51
52	#[error("invalid IRI reference <{0}>: {1}")]
53	InvalidIriRef(iref::Error, String),
54
55	#[error(transparent)]
56	Unexpected(Unexpected),
57
58	#[error(transparent)]
59	Stream(E),
60}
61
62/// Token.
63#[derive(Debug)]
64pub enum Token {
65	Keyword(Keyword),
66	Begin(Delimiter),
67	End(Delimiter),
68	LangTag(LanguageTagBuf),
69	IriRef(IriRefBuf),
70	StringLiteral(String),
71	BlankNodeLabel(BlankIdBuf),
72	Punct(Punct),
73	CompactIri((String, Span), (String, Span)),
74	Numeric(NumericLiteral),
75}
76
77impl fmt::Display for Token {
78	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
79		match self {
80			Self::Keyword(kw) => write!(f, "keyword `{kw}`"),
81			Self::Begin(d) => write!(f, "opening `{}`", d.begin()),
82			Self::End(d) => write!(f, "closing `{}`", d.end()),
83			Self::LangTag(tag) => write!(f, "language tag `{tag}`"),
84			Self::IriRef(iri_ref) => write!(f, "IRI reference <{iri_ref}>"),
85			Self::StringLiteral(string) => {
86				write!(f, "string literal \"{}\"", DisplayStringLiteral(string))
87			}
88			Self::BlankNodeLabel(label) => write!(f, "blank node label `{label}`"),
89			Self::Punct(p) => p.fmt(f),
90			Self::CompactIri((prefix, _), (suffix, _)) => {
91				write!(f, "compact IRI `{prefix}:{suffix}`")
92			}
93			Self::Numeric(n) => write!(f, "numeric literal `{n}`"),
94		}
95	}
96}
97
98/// Wrapper to display string literals.
99pub struct DisplayStringLiteral<'a>(pub &'a str);
100
101impl<'a> fmt::Display for DisplayStringLiteral<'a> {
102	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
103		for c in self.0.chars() {
104			match c {
105				'"' => write!(f, "\\u0022"),
106				'\\' => write!(f, "\\u005c"),
107				'\n' => write!(f, "\\n"),
108				'\r' => write!(f, "\\r"),
109				'\t' => write!(f, "\\t"),
110				'\u{08}' => write!(f, "\\b"),
111				'\u{0c}' => write!(f, "\\f"),
112				c => c.fmt(f),
113			}?
114		}
115
116		Ok(())
117	}
118}
119
120#[derive(Debug)]
121pub enum Keyword {
122	A,
123	Prefix,
124	Base,
125	SparqlBase,
126	SparqlPrefix,
127	True,
128	False,
129}
130
131#[derive(Clone)]
132pub struct NotAKeyword;
133
134impl FromStr for Keyword {
135	type Err = NotAKeyword;
136
137	fn from_str(s: &str) -> Result<Self, Self::Err> {
138		if s == "a" {
139			Ok(Self::A)
140		} else if s == "true" {
141			Ok(Self::True)
142		} else if s == "false" {
143			Ok(Self::False)
144		} else if s == unicase::Ascii::new("BASE") {
145			Ok(Self::SparqlBase)
146		} else if s == unicase::Ascii::new("PREFIX") {
147			Ok(Self::SparqlPrefix)
148		} else if s == "@prefix" {
149			Ok(Self::Prefix)
150		} else if s == "@base" {
151			Ok(Self::Base)
152		} else {
153			Err(NotAKeyword)
154		}
155	}
156}
157
158impl fmt::Display for Keyword {
159	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
160		match self {
161			Self::A => write!(f, "a"),
162			Self::Prefix => write!(f, "@prefix"),
163			Self::Base => write!(f, "@base"),
164			Self::SparqlBase => write!(f, "BASE"),
165			Self::SparqlPrefix => write!(f, "PREFIX"),
166			Self::True => write!(f, "true"),
167			Self::False => write!(f, "false"),
168		}
169	}
170}
171
172#[derive(Debug)]
173pub enum Delimiter {
174	Parenthesis,
175	Bracket,
176}
177
178impl Delimiter {
179	pub fn begin(&self) -> char {
180		match self {
181			Self::Parenthesis => '(',
182			Self::Bracket => '[',
183		}
184	}
185
186	pub fn end(&self) -> char {
187		match self {
188			Self::Parenthesis => ')',
189			Self::Bracket => ']',
190		}
191	}
192}
193
194#[derive(Debug)]
195pub enum Punct {
196	Period,
197	Semicolon,
198	Comma,
199	Carets,
200}
201
202impl fmt::Display for Punct {
203	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
204		match self {
205			Self::Period => write!(f, "dot `.`"),
206			Self::Semicolon => write!(f, "semicolon `;`"),
207			Self::Comma => write!(f, "comma `,`"),
208			Self::Carets => write!(f, "carets `^^`"),
209		}
210	}
211}
212
213/// Lexer position.
214struct Position {
215	span: Span,
216	last_span: Span,
217}
218
219impl Position {
220	fn current(&self) -> Span {
221		self.span
222	}
223
224	fn end(&self) -> Span {
225		self.span.end().into()
226	}
227
228	fn last(&self) -> Span {
229		self.last_span
230	}
231}
232
233/// Lexer.
234///
235/// Changes a character iterator into a `Token` iterator.
236pub struct Lexer<C: Iterator<Item = Result<DecodedChar, E>>, E> {
237	chars: Peekable<C>,
238	pos: Position,
239	lookahead: Option<Meta<Token, Span>>,
240}
241
242impl<C: Iterator<Item = Result<DecodedChar, E>>, E> Lexer<C, E> {
243	pub fn new(chars: C) -> Self {
244		Self {
245			chars: chars.peekable(),
246			pos: Position {
247				span: Span::default(),
248				last_span: Span::default(),
249			},
250			lookahead: None,
251		}
252	}
253}
254
255enum LanguageTagOrKeyword {
256	Keyword(Keyword),
257	LanguageTag(LanguageTagBuf),
258}
259
260enum NameOrKeyword {
261	Keyword(Keyword),
262	CompactIri((String, Span), (String, Span)),
263}
264
265enum NumericOrPeriod {
266	Numeric(NumericLiteral),
267	Period,
268}
269
270impl<C: Iterator<Item = Result<DecodedChar, E>>, E> Lexer<C, E> {
271	fn peek_char(&mut self) -> Result<Option<char>, Meta<Error<E>, Span>> {
272		match self.chars.peek() {
273			None => Ok(None),
274			Some(Ok(c)) => Ok(Some(c.chr())),
275			Some(Err(_)) => self.next_char(),
276		}
277	}
278
279	fn next_char(&mut self) -> Result<Option<char>, Meta<Error<E>, Span>> {
280		match self.chars.next() {
281			None => Ok(None),
282			Some(Ok(c)) => {
283				self.pos.span.push(c.len());
284				self.pos.last_span.clear();
285				self.pos.last_span.push(c.len());
286				Ok(Some(c.chr()))
287			}
288			Some(Err(e)) => Err(Meta(Error::Stream(e), self.pos.end())),
289		}
290	}
291
292	fn expect_char(&mut self) -> Result<char, Meta<Error<E>, Span>> {
293		self.next_char()?
294			.ok_or_else(|| Meta(Error::Unexpected(Unexpected::EndOfFile), self.pos.end()))
295	}
296
297	fn skip_whitespaces(&mut self) -> Result<(), Meta<Error<E>, Span>> {
298		while let Some(c) = self.peek_char()? {
299			if c.is_whitespace() {
300				self.next_char()?;
301			} else if c == '#' {
302				self.next_comment()?;
303			} else {
304				break;
305			}
306		}
307
308		self.pos.span.clear();
309		Ok(())
310	}
311
312	/// Parses the rest of a comment, after the first `#` character.
313	///
314	/// Comments in N-Quads take the form of `#`,
315	/// outside an IRIREF or STRING_LITERAL_QUOTE,
316	/// and continue to the end of line (EOL) or end of file
317	/// if there is no end of line after the comment marker.
318	fn next_comment(&mut self) -> Result<(), Meta<Error<E>, Span>> {
319		loop {
320			if matches!(self.next_char()?, None | Some('\n')) {
321				break Ok(());
322			}
323		}
324	}
325
326	/// Parses the rest of a lang tag, after the first `@` character.
327	fn next_langtag_or_keyword(
328		&mut self,
329	) -> Result<Meta<LanguageTagOrKeyword, Span>, Meta<Error<E>, Span>> {
330		let mut tag = String::new();
331
332		loop {
333			match self.peek_char()? {
334				None => {
335					if tag.is_empty() {
336						return Err(Meta(Error::InvalidLangTag, self.pos.current()));
337					} else {
338						break;
339					}
340				}
341				Some(c) => {
342					if c.is_ascii_alphabetic() {
343						tag.push(self.expect_char()?);
344					} else if tag.is_empty() {
345						return Err(Meta(Error::InvalidLangTag, self.pos.current()));
346					} else {
347						break;
348					}
349				}
350			}
351		}
352
353		let mut empty_subtag = true;
354		if let Some('-') = self.peek_char()? {
355			tag.push(self.expect_char()?);
356			loop {
357				match self.peek_char()? {
358					Some('-') if !empty_subtag => tag.push(self.expect_char()?),
359					Some(c) if c.is_ascii_alphanumeric() => {
360						empty_subtag = false;
361						tag.push(self.expect_char()?)
362					}
363					Some(c) => {
364						if c.is_whitespace() {
365							if empty_subtag {
366								return Err(Meta(Error::InvalidLangTag, self.pos.current()));
367							} else {
368								break;
369							}
370						} else {
371							self.next_char()?;
372							return Err(Meta(
373								Error::Unexpected(Unexpected::Char(c)),
374								self.pos.last(),
375							));
376						}
377					}
378					None => {
379						if empty_subtag {
380							return Err(Meta(Error::InvalidLangTag, self.pos.current()));
381						} else {
382							break;
383						}
384					}
385				}
386			}
387		}
388
389		match tag.as_str() {
390			"prefix" => Ok(Meta(
391				LanguageTagOrKeyword::Keyword(Keyword::Prefix),
392				self.pos.current(),
393			)),
394			"base" => Ok(Meta(
395				LanguageTagOrKeyword::Keyword(Keyword::Base),
396				self.pos.current(),
397			)),
398			_ => match LanguageTagBuf::new(tag.into_bytes()) {
399				Ok(tag) => Ok(Meta(
400					LanguageTagOrKeyword::LanguageTag(tag),
401					self.pos.current(),
402				)),
403				Err(_) => Err(Meta(Error::InvalidLangTag, self.pos.current())),
404			},
405		}
406	}
407
408	/// Parses an IRI reference, starting after the first `<` until the closing
409	/// `>`.
410	fn next_iriref(&mut self) -> Result<Meta<IriRefBuf, Span>, Meta<Error<E>, Span>> {
411		let mut iriref = String::new();
412
413		loop {
414			match self.next_char()? {
415				Some('>') => break,
416				Some('\\') => {
417					let span = self.pos.last();
418					let c = match self.next_char()? {
419						Some('u') => self.next_hex_char(span, 4)?,
420						Some('U') => self.next_hex_char(span, 8)?,
421						unexpected => {
422							return Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last()))
423						}
424					};
425
426					iriref.push(c)
427				}
428				Some(c) => {
429					if matches!(
430						c,
431						'\u{00}'..='\u{20}' | '<' | '>' | '"' | '{' | '}' | '|' | '^' | '`' | '\\'
432					) {
433						return Err(Meta(
434							Error::Unexpected(Unexpected::Char(c)),
435							self.pos.last(),
436						));
437					}
438
439					iriref.push(c)
440				}
441				None => {
442					return Err(Meta(
443						Error::Unexpected(Unexpected::EndOfFile),
444						self.pos.end(),
445					))
446				}
447			}
448		}
449
450		match IriRefBuf::from_string(iriref) {
451			Ok(iriref) => Ok(Meta(iriref, self.pos.current())),
452			Err((e, string)) => Err(Meta(Error::InvalidIriRef(e, string), self.pos.current())),
453		}
454	}
455
456	fn next_hex_char(&mut self, mut span: Span, len: u8) -> Result<char, Meta<Error<E>, Span>> {
457		let mut codepoint = 0;
458
459		for _ in 0..len {
460			let c = self.expect_char()?;
461			match c.to_digit(16) {
462				Some(d) => codepoint = codepoint << 4 | d,
463				None => {
464					return Err(Meta(
465						Error::Unexpected(Unexpected::Char(c)),
466						self.pos.last(),
467					))
468				}
469			}
470		}
471
472		span.set_end(self.pos.current().end());
473		match char::try_from(codepoint) {
474			Ok(c) => Ok(c),
475			Err(_) => Err(Meta(Error::InvalidCodepoint(codepoint), span)),
476		}
477	}
478
479	/// Parses a string literal, starting after the first `"` until the closing
480	/// `"`.
481	fn next_string_literal(
482		&mut self,
483		delimiter: char,
484	) -> Result<Meta<String, Span>, Meta<Error<E>, Span>> {
485		let mut string = String::new();
486
487		let mut long = false;
488
489		loop {
490			match self.next_char()? {
491				Some(c) if c == delimiter => {
492					if !long {
493						if string.is_empty() && self.peek_char()? == Some(delimiter) {
494							self.next_char()?;
495							long = true;
496						} else {
497							break;
498						}
499					} else if self.peek_char()? == Some(delimiter) {
500						self.next_char()?;
501						if self.peek_char()? == Some(delimiter) {
502							self.next_char()?;
503							break;
504						} else {
505							string.push(delimiter);
506							string.push(delimiter);
507						}
508					} else {
509						string.push(delimiter);
510					}
511				}
512				Some('\\') => {
513					let span = self.pos.last();
514					let c = match self.next_char()? {
515						Some('u') => self.next_hex_char(span, 4)?,
516						Some('U') => self.next_hex_char(span, 8)?,
517						Some('t') => '\t',
518						Some('b') => '\u{08}',
519						Some('n') => '\n',
520						Some('r') => '\r',
521						Some('f') => '\u{0c}',
522						Some('\'') => '\'',
523						Some('"') => '"',
524						Some('\\') => '\\',
525						unexpected => {
526							return Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last()))
527						}
528					};
529
530					string.push(c)
531				}
532				Some(c) => {
533					// if !long && matches!(c, '\n' | '\r') {
534					// 	return Err(Meta(Error::Unexpected(Unexpected::Char(c)), self.pos.last()));
535					// }
536					string.push(c)
537				}
538				None => {
539					return Err(Meta(
540						Error::Unexpected(Unexpected::EndOfFile),
541						self.pos.end(),
542					))
543				}
544			}
545		}
546
547		Ok(Meta(string, self.pos.current()))
548	}
549
550	/// Parses an IRI reference, starting after the first `<` until the closing
551	/// `>`.
552	fn next_numeric_or_dot(
553		&mut self,
554		first: char,
555	) -> Result<Meta<NumericOrPeriod, Span>, Meta<Error<E>, Span>> {
556		let mut buffer: String = first.into();
557
558		enum State {
559			NonEmptyInteger,
560			Integer,
561			NonENonEmptyDecimal,
562			NonEmptyDecimal,
563			Decimal,
564			ExponentSign,
565			NonEmptyExponent,
566			Exponent,
567		}
568
569		let mut state = match first {
570			'+' => State::NonEmptyInteger,
571			'-' => State::NonEmptyInteger,
572			'.' => State::NonENonEmptyDecimal,
573			'0'..='9' => State::Integer,
574			_ => panic!("invalid first numeric character"),
575		};
576
577		loop {
578			state = match state {
579				State::NonEmptyInteger => match self.peek_char()? {
580					Some('0'..='9') => State::Integer,
581					Some('.') => State::NonEmptyDecimal,
582					unexpected => {
583						return Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last()))
584					}
585				},
586				State::Integer => match self.peek_char()? {
587					Some('0'..='9') => State::Integer,
588					Some('.') => State::NonEmptyDecimal,
589					Some('e' | 'E') => State::ExponentSign,
590					_ => break,
591				},
592				State::NonENonEmptyDecimal => match self.peek_char()? {
593					Some('0'..='9') => State::Decimal,
594					_ => return Ok(Meta(NumericOrPeriod::Period, self.pos.current())),
595				},
596				State::NonEmptyDecimal => match self.peek_char()? {
597					Some('0'..='9') => State::Decimal,
598					Some('e' | 'E') => State::ExponentSign,
599					unexpected => {
600						return Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last()))
601					}
602				},
603				State::Decimal => match self.peek_char()? {
604					Some('0'..='9') => State::Decimal,
605					Some('e' | 'E') => State::ExponentSign,
606					_ => break,
607				},
608				State::ExponentSign => match self.peek_char()? {
609					Some('+' | '-') => State::NonEmptyExponent,
610					Some('0'..='9') => State::Exponent,
611					unexpected => {
612						return Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last()))
613					}
614				},
615				State::NonEmptyExponent => match self.peek_char()? {
616					Some('0'..='9') => State::Exponent,
617					unexpected => {
618						return Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last()))
619					}
620				},
621				State::Exponent => match self.peek_char()? {
622					Some('0'..='9') => State::Exponent,
623					_ => break,
624				},
625			};
626
627			buffer.push(self.expect_char()?);
628		}
629
630		let n = match state {
631			State::Integer => NumericLiteral::Integer(unsafe { IntegerBuf::new_unchecked(buffer) }),
632			State::Decimal => NumericLiteral::Decimal(unsafe { DecimalBuf::new_unchecked(buffer) }),
633			State::Exponent => NumericLiteral::Double(unsafe { DoubleBuf::new_unchecked(buffer) }),
634			_ => unreachable!(),
635		};
636
637		Ok(Meta(NumericOrPeriod::Numeric(n), self.pos.current()))
638	}
639
640	/// Parses a blank node label, starting after the first `_`.
641	fn next_blank_node_label(&mut self) -> Result<Meta<BlankIdBuf, Span>, Meta<Error<E>, Span>> {
642		match self.next_char()? {
643			Some(':') => {
644				let mut label = String::new();
645				label.push('_');
646				label.push(':');
647				match self.next_char()? {
648					Some(c) if c.is_ascii_digit() || is_pn_chars_u(c) => {
649						label.push(c);
650						let mut last_is_pn_chars = true;
651						loop {
652							match self.peek_char()? {
653								Some(c) if is_pn_chars(c) => {
654									label.push(self.expect_char()?);
655									last_is_pn_chars = true
656								}
657								Some('.') => {
658									label.push(self.expect_char()?);
659									last_is_pn_chars = false;
660								}
661								_ if last_is_pn_chars => break,
662								unexpected => {
663									return Err(Meta(
664										Error::Unexpected(unexpected.into()),
665										self.pos.last(),
666									))
667								}
668							}
669						}
670
671						Ok(Meta(
672							unsafe { BlankIdBuf::new_unchecked(label) },
673							self.pos.current(),
674						))
675					}
676					unexpected => Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last())),
677				}
678			}
679			unexpected => Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last())),
680		}
681	}
682
683	fn next_escape(&mut self) -> Result<char, Meta<Error<E>, Span>> {
684		match self.next_char()? {
685			Some(
686				c @ ('_' | '~' | '.' | '-' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ','
687				| ';' | '=' | '/' | '?' | '#' | '@' | '%'),
688			) => Ok(c),
689			unexpected => Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last())),
690		}
691	}
692
693	fn next_name_or_keyword(
694		&mut self,
695		c: char,
696	) -> Result<Meta<NameOrKeyword, Span>, Meta<Error<E>, Span>> {
697		// PNAME_NS or Keyword
698		let namespace = match c {
699			':' => (String::new(), self.pos.current()),
700			c if is_pn_chars_base(c) => {
701				let mut namespace = String::new();
702				namespace.push(c);
703				let mut last_is_pn_chars = true;
704				let span = loop {
705					match self.peek_char()? {
706						Some(c) if is_pn_chars(c) => {
707							namespace.push(self.expect_char()?);
708							last_is_pn_chars = true
709						}
710						Some('.') => {
711							namespace.push(self.expect_char()?);
712							last_is_pn_chars = false;
713						}
714						Some(':') if last_is_pn_chars => {
715							let span = self.pos.current();
716							self.expect_char()?;
717							break span;
718						}
719						unexpected => {
720							return if unexpected.map(|c| c.is_whitespace()).unwrap_or(true) {
721								match Keyword::from_str(&namespace) {
722									Ok(kw) => {
723										Ok(Meta(NameOrKeyword::Keyword(kw), self.pos.current()))
724									}
725									Err(NotAKeyword) => break self.pos.current(),
726								}
727							} else {
728								Err(Meta(Error::Unexpected(unexpected.into()), self.pos.end()))
729							}
730						}
731					}
732				};
733
734				(namespace, span)
735			}
736			unexpected => {
737				return Err(Meta(
738					Error::Unexpected(Unexpected::Char(unexpected)),
739					self.pos.last(),
740				))
741			}
742		};
743
744		// PN_LOCAL
745		let mut suffix = String::new();
746		let mut suffix_span = self.pos.current().next();
747		match self.peek_char()? {
748			Some(c) if is_pn_chars_u(c) || c.is_ascii_digit() || matches!(c, ':' | '%' | '\\') => {
749				let c = match self.expect_char()? {
750					'%' => {
751						// percent encoded.
752						self.next_hex_char(self.pos.current().end().into(), 2)?
753					}
754					'\\' => {
755						// escape sequence.
756						self.next_escape()?
757					}
758					c => c,
759				};
760
761				suffix.push(c);
762
763				loop {
764					match self.peek_char()? {
765						Some(c)
766							if is_pn_chars(c)
767								|| c.is_ascii_digit() || matches!(c, ':' | '%' | '\\') =>
768						{
769							let c = match self.expect_char()? {
770								'%' => {
771									// percent encoded.
772									self.next_hex_char(self.pos.current().end().into(), 2)?
773								}
774								'\\' => {
775									// escape sequence.
776									self.next_escape()?
777								}
778								c => c,
779							};
780
781							suffix.push(c);
782						}
783						_ => {
784							suffix_span.set_end(self.pos.current().end());
785							break Ok(Meta(
786								NameOrKeyword::CompactIri(namespace, (suffix, suffix_span)),
787								self.pos.current(),
788							));
789						}
790					}
791				}
792			}
793			_ => Ok(Meta(
794				NameOrKeyword::CompactIri(namespace, (String::new(), self.pos.current())),
795				self.pos.current(),
796			)),
797		}
798	}
799
800	pub fn consume(&mut self) -> Result<Meta<Option<Token>, Span>, Meta<Error<E>, Span>> {
801		self.skip_whitespaces()?;
802		match self.next_char()? {
803			Some('@') => Ok(self.next_langtag_or_keyword()?.map(|t| match t {
804				LanguageTagOrKeyword::LanguageTag(tag) => Some(Token::LangTag(tag)),
805				LanguageTagOrKeyword::Keyword(kw) => Some(Token::Keyword(kw)),
806			})),
807			Some('<') => Ok(self.next_iriref()?.map(|t| Some(Token::IriRef(t)))),
808			Some('"') => Ok(self
809				.next_string_literal('"')?
810				.map(|t| Some(Token::StringLiteral(t)))),
811			Some('\'') => Ok(self
812				.next_string_literal('\'')?
813				.map(|t| Some(Token::StringLiteral(t)))),
814			Some('_') => Ok(self
815				.next_blank_node_label()?
816				.map(|t| Some(Token::BlankNodeLabel(t)))),
817			Some(',') => Ok(Meta(Some(Token::Punct(Punct::Comma)), self.pos.current())),
818			Some(';') => Ok(Meta(
819				Some(Token::Punct(Punct::Semicolon)),
820				self.pos.current(),
821			)),
822			Some('^') => match self.next_char()? {
823				Some('^') => Ok(Meta(Some(Token::Punct(Punct::Carets)), self.pos.current())),
824				unexpected => Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last())),
825			},
826			Some('(') => Ok(Meta(
827				Some(Token::Begin(Delimiter::Parenthesis)),
828				self.pos.current(),
829			)),
830			Some('[') => Ok(Meta(
831				Some(Token::Begin(Delimiter::Bracket)),
832				self.pos.current(),
833			)),
834			Some(')') => Ok(Meta(
835				Some(Token::End(Delimiter::Parenthesis)),
836				self.pos.current(),
837			)),
838			Some(']') => Ok(Meta(
839				Some(Token::End(Delimiter::Bracket)),
840				self.pos.current(),
841			)),
842			Some(c @ ('+' | '-' | '0'..='9' | '.')) => {
843				Ok(self.next_numeric_or_dot(c)?.map(|t| match t {
844					NumericOrPeriod::Numeric(n) => Some(Token::Numeric(n)),
845					NumericOrPeriod::Period => Some(Token::Punct(Punct::Period)),
846				}))
847			}
848			Some(c) => Ok(self.next_name_or_keyword(c)?.map(|t| match t {
849				NameOrKeyword::Keyword(kw) => Some(Token::Keyword(kw)),
850				NameOrKeyword::CompactIri(p, s) => Some(Token::CompactIri(p, s)),
851			})),
852			None => Ok(Meta(None, self.pos.end())),
853		}
854	}
855
856	#[allow(clippy::type_complexity)]
857	pub fn peek(&mut self) -> Result<Meta<Option<&Token>, Span>, Meta<Error<E>, Span>> {
858		if self.lookahead.is_none() {
859			if let locspan::Meta(Some(token), loc) = self.consume()? {
860				self.lookahead = Some(Meta::new(token, loc));
861			}
862		}
863
864		match &self.lookahead {
865			Some(locspan::Meta(token, loc)) => Ok(Meta::new(Some(token), *loc)),
866			None => Ok(Meta::new(None, self.pos.end())),
867		}
868	}
869
870	#[allow(clippy::type_complexity, clippy::should_implement_trait)]
871	pub fn next(&mut self) -> Result<Meta<Option<Token>, Span>, Meta<Error<E>, Span>> {
872		match self.lookahead.take() {
873			Some(locspan::Meta(token, loc)) => Ok(Meta::new(Some(token), loc)),
874			None => self.consume(),
875		}
876	}
877}
878
879impl<E, C: Iterator<Item = Result<DecodedChar, E>>> Tokens for Lexer<C, E> {
880	type Error = Error<E>;
881
882	fn peek(&mut self) -> Result<Meta<Option<&Token>, Span>, Meta<Error<E>, Span>> {
883		self.peek()
884	}
885
886	fn next(&mut self) -> Result<Meta<Option<Token>, Span>, Meta<Error<E>, Span>> {
887		self.next()
888	}
889
890	fn last(&self) -> Span {
891		self.pos.last_span
892	}
893}
894
895impl<E, C: Iterator<Item = Result<DecodedChar, E>>> Iterator for Lexer<C, E> {
896	type Item = Result<Meta<Token, Span>, Meta<Error<E>, Span>>;
897
898	fn next(&mut self) -> Option<Self::Item> {
899		match self.next() {
900			Ok(Meta(Some(token), loc)) => Some(Ok(Meta::new(token, loc))),
901			Ok(Meta(None, _)) => None,
902			Err(e) => Some(Err(e)),
903		}
904	}
905}
906
907fn is_pn_chars_base(c: char) -> bool {
908	matches!(c, 'A'..='Z' | 'a'..='z' | '\u{00c0}'..='\u{00d6}' | '\u{00d8}'..='\u{00f6}' | '\u{00f8}'..='\u{02ff}' | '\u{0370}'..='\u{037d}' | '\u{037f}'..='\u{1fff}' | '\u{200c}'..='\u{200d}' | '\u{2070}'..='\u{218f}' | '\u{2c00}'..='\u{2fef}' | '\u{3001}'..='\u{d7ff}' | '\u{f900}'..='\u{fdcf}' | '\u{fdf0}'..='\u{fffd}' | '\u{10000}'..='\u{effff}')
909}
910
911fn is_pn_chars_u(c: char) -> bool {
912	is_pn_chars_base(c) || c == '_'
913}
914
915fn is_pn_chars(c: char) -> bool {
916	is_pn_chars_u(c)
917		|| matches!(c, '-' | '0'..='9' | '\u{00b7}' | '\u{0300}'..='\u{036f}' | '\u{203f}'..='\u{2040}')
918}