1use crate::{BlankIdBuf, DecimalBuf, DoubleBuf, IntegerBuf, NumericLiteral};
2use decoded_char::DecodedChar;
3use iref::IriRefBuf;
4use langtag::LanguageTagBuf;
5use locspan::{Meta, Span};
6use std::fmt;
7use std::iter::Peekable;
8use std::str::FromStr;
9
10pub trait Tokens {
12 type Error;
13
14 #[allow(clippy::type_complexity)]
15 fn peek(&mut self) -> Result<Meta<Option<&Token>, Span>, Meta<Self::Error, Span>>;
16
17 #[allow(clippy::type_complexity)]
18 fn next(&mut self) -> Result<Meta<Option<Token>, Span>, Meta<Self::Error, Span>>;
19
20 fn last(&self) -> Span;
22}
23
24#[derive(Debug, thiserror::Error)]
26pub enum Unexpected {
27 #[error("unexpected character `{0}`")]
28 Char(char),
29
30 #[error("unexpected end of file")]
31 EndOfFile,
32}
33
34impl From<Option<char>> for Unexpected {
35 fn from(value: Option<char>) -> Self {
36 match value {
37 Some(c) => Self::Char(c),
38 None => Self::EndOfFile,
39 }
40 }
41}
42
43#[derive(Debug, thiserror::Error)]
45pub enum Error<E = std::convert::Infallible> {
46 #[error("invalid language tag")]
47 InvalidLangTag,
48
49 #[error("invalid character code point {0:x}")]
50 InvalidCodepoint(u32),
51
52 #[error("invalid IRI reference <{0}>: {1}")]
53 InvalidIriRef(iref::Error, String),
54
55 #[error(transparent)]
56 Unexpected(Unexpected),
57
58 #[error(transparent)]
59 Stream(E),
60}
61
62#[derive(Debug)]
64pub enum Token {
65 Keyword(Keyword),
66 Begin(Delimiter),
67 End(Delimiter),
68 LangTag(LanguageTagBuf),
69 IriRef(IriRefBuf),
70 StringLiteral(String),
71 BlankNodeLabel(BlankIdBuf),
72 Punct(Punct),
73 CompactIri((String, Span), (String, Span)),
74 Numeric(NumericLiteral),
75}
76
77impl fmt::Display for Token {
78 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
79 match self {
80 Self::Keyword(kw) => write!(f, "keyword `{kw}`"),
81 Self::Begin(d) => write!(f, "opening `{}`", d.begin()),
82 Self::End(d) => write!(f, "closing `{}`", d.end()),
83 Self::LangTag(tag) => write!(f, "language tag `{tag}`"),
84 Self::IriRef(iri_ref) => write!(f, "IRI reference <{iri_ref}>"),
85 Self::StringLiteral(string) => {
86 write!(f, "string literal \"{}\"", DisplayStringLiteral(string))
87 }
88 Self::BlankNodeLabel(label) => write!(f, "blank node label `{label}`"),
89 Self::Punct(p) => p.fmt(f),
90 Self::CompactIri((prefix, _), (suffix, _)) => {
91 write!(f, "compact IRI `{prefix}:{suffix}`")
92 }
93 Self::Numeric(n) => write!(f, "numeric literal `{n}`"),
94 }
95 }
96}
97
98pub struct DisplayStringLiteral<'a>(pub &'a str);
100
101impl<'a> fmt::Display for DisplayStringLiteral<'a> {
102 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
103 for c in self.0.chars() {
104 match c {
105 '"' => write!(f, "\\u0022"),
106 '\\' => write!(f, "\\u005c"),
107 '\n' => write!(f, "\\n"),
108 '\r' => write!(f, "\\r"),
109 '\t' => write!(f, "\\t"),
110 '\u{08}' => write!(f, "\\b"),
111 '\u{0c}' => write!(f, "\\f"),
112 c => c.fmt(f),
113 }?
114 }
115
116 Ok(())
117 }
118}
119
120#[derive(Debug)]
121pub enum Keyword {
122 A,
123 Prefix,
124 Base,
125 SparqlBase,
126 SparqlPrefix,
127 True,
128 False,
129}
130
131#[derive(Clone)]
132pub struct NotAKeyword;
133
134impl FromStr for Keyword {
135 type Err = NotAKeyword;
136
137 fn from_str(s: &str) -> Result<Self, Self::Err> {
138 if s == "a" {
139 Ok(Self::A)
140 } else if s == "true" {
141 Ok(Self::True)
142 } else if s == "false" {
143 Ok(Self::False)
144 } else if s == unicase::Ascii::new("BASE") {
145 Ok(Self::SparqlBase)
146 } else if s == unicase::Ascii::new("PREFIX") {
147 Ok(Self::SparqlPrefix)
148 } else if s == "@prefix" {
149 Ok(Self::Prefix)
150 } else if s == "@base" {
151 Ok(Self::Base)
152 } else {
153 Err(NotAKeyword)
154 }
155 }
156}
157
158impl fmt::Display for Keyword {
159 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
160 match self {
161 Self::A => write!(f, "a"),
162 Self::Prefix => write!(f, "@prefix"),
163 Self::Base => write!(f, "@base"),
164 Self::SparqlBase => write!(f, "BASE"),
165 Self::SparqlPrefix => write!(f, "PREFIX"),
166 Self::True => write!(f, "true"),
167 Self::False => write!(f, "false"),
168 }
169 }
170}
171
172#[derive(Debug)]
173pub enum Delimiter {
174 Parenthesis,
175 Bracket,
176}
177
178impl Delimiter {
179 pub fn begin(&self) -> char {
180 match self {
181 Self::Parenthesis => '(',
182 Self::Bracket => '[',
183 }
184 }
185
186 pub fn end(&self) -> char {
187 match self {
188 Self::Parenthesis => ')',
189 Self::Bracket => ']',
190 }
191 }
192}
193
194#[derive(Debug)]
195pub enum Punct {
196 Period,
197 Semicolon,
198 Comma,
199 Carets,
200}
201
202impl fmt::Display for Punct {
203 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
204 match self {
205 Self::Period => write!(f, "dot `.`"),
206 Self::Semicolon => write!(f, "semicolon `;`"),
207 Self::Comma => write!(f, "comma `,`"),
208 Self::Carets => write!(f, "carets `^^`"),
209 }
210 }
211}
212
213struct Position {
215 span: Span,
216 last_span: Span,
217}
218
219impl Position {
220 fn current(&self) -> Span {
221 self.span
222 }
223
224 fn end(&self) -> Span {
225 self.span.end().into()
226 }
227
228 fn last(&self) -> Span {
229 self.last_span
230 }
231}
232
233pub struct Lexer<C: Iterator<Item = Result<DecodedChar, E>>, E> {
237 chars: Peekable<C>,
238 pos: Position,
239 lookahead: Option<Meta<Token, Span>>,
240}
241
242impl<C: Iterator<Item = Result<DecodedChar, E>>, E> Lexer<C, E> {
243 pub fn new(chars: C) -> Self {
244 Self {
245 chars: chars.peekable(),
246 pos: Position {
247 span: Span::default(),
248 last_span: Span::default(),
249 },
250 lookahead: None,
251 }
252 }
253}
254
255enum LanguageTagOrKeyword {
256 Keyword(Keyword),
257 LanguageTag(LanguageTagBuf),
258}
259
260enum NameOrKeyword {
261 Keyword(Keyword),
262 CompactIri((String, Span), (String, Span)),
263}
264
265enum NumericOrPeriod {
266 Numeric(NumericLiteral),
267 Period,
268}
269
270impl<C: Iterator<Item = Result<DecodedChar, E>>, E> Lexer<C, E> {
271 fn peek_char(&mut self) -> Result<Option<char>, Meta<Error<E>, Span>> {
272 match self.chars.peek() {
273 None => Ok(None),
274 Some(Ok(c)) => Ok(Some(c.chr())),
275 Some(Err(_)) => self.next_char(),
276 }
277 }
278
279 fn next_char(&mut self) -> Result<Option<char>, Meta<Error<E>, Span>> {
280 match self.chars.next() {
281 None => Ok(None),
282 Some(Ok(c)) => {
283 self.pos.span.push(c.len());
284 self.pos.last_span.clear();
285 self.pos.last_span.push(c.len());
286 Ok(Some(c.chr()))
287 }
288 Some(Err(e)) => Err(Meta(Error::Stream(e), self.pos.end())),
289 }
290 }
291
292 fn expect_char(&mut self) -> Result<char, Meta<Error<E>, Span>> {
293 self.next_char()?
294 .ok_or_else(|| Meta(Error::Unexpected(Unexpected::EndOfFile), self.pos.end()))
295 }
296
297 fn skip_whitespaces(&mut self) -> Result<(), Meta<Error<E>, Span>> {
298 while let Some(c) = self.peek_char()? {
299 if c.is_whitespace() {
300 self.next_char()?;
301 } else if c == '#' {
302 self.next_comment()?;
303 } else {
304 break;
305 }
306 }
307
308 self.pos.span.clear();
309 Ok(())
310 }
311
312 fn next_comment(&mut self) -> Result<(), Meta<Error<E>, Span>> {
319 loop {
320 if matches!(self.next_char()?, None | Some('\n')) {
321 break Ok(());
322 }
323 }
324 }
325
326 fn next_langtag_or_keyword(
328 &mut self,
329 ) -> Result<Meta<LanguageTagOrKeyword, Span>, Meta<Error<E>, Span>> {
330 let mut tag = String::new();
331
332 loop {
333 match self.peek_char()? {
334 None => {
335 if tag.is_empty() {
336 return Err(Meta(Error::InvalidLangTag, self.pos.current()));
337 } else {
338 break;
339 }
340 }
341 Some(c) => {
342 if c.is_ascii_alphabetic() {
343 tag.push(self.expect_char()?);
344 } else if tag.is_empty() {
345 return Err(Meta(Error::InvalidLangTag, self.pos.current()));
346 } else {
347 break;
348 }
349 }
350 }
351 }
352
353 let mut empty_subtag = true;
354 if let Some('-') = self.peek_char()? {
355 tag.push(self.expect_char()?);
356 loop {
357 match self.peek_char()? {
358 Some('-') if !empty_subtag => tag.push(self.expect_char()?),
359 Some(c) if c.is_ascii_alphanumeric() => {
360 empty_subtag = false;
361 tag.push(self.expect_char()?)
362 }
363 Some(c) => {
364 if c.is_whitespace() {
365 if empty_subtag {
366 return Err(Meta(Error::InvalidLangTag, self.pos.current()));
367 } else {
368 break;
369 }
370 } else {
371 self.next_char()?;
372 return Err(Meta(
373 Error::Unexpected(Unexpected::Char(c)),
374 self.pos.last(),
375 ));
376 }
377 }
378 None => {
379 if empty_subtag {
380 return Err(Meta(Error::InvalidLangTag, self.pos.current()));
381 } else {
382 break;
383 }
384 }
385 }
386 }
387 }
388
389 match tag.as_str() {
390 "prefix" => Ok(Meta(
391 LanguageTagOrKeyword::Keyword(Keyword::Prefix),
392 self.pos.current(),
393 )),
394 "base" => Ok(Meta(
395 LanguageTagOrKeyword::Keyword(Keyword::Base),
396 self.pos.current(),
397 )),
398 _ => match LanguageTagBuf::new(tag.into_bytes()) {
399 Ok(tag) => Ok(Meta(
400 LanguageTagOrKeyword::LanguageTag(tag),
401 self.pos.current(),
402 )),
403 Err(_) => Err(Meta(Error::InvalidLangTag, self.pos.current())),
404 },
405 }
406 }
407
408 fn next_iriref(&mut self) -> Result<Meta<IriRefBuf, Span>, Meta<Error<E>, Span>> {
411 let mut iriref = String::new();
412
413 loop {
414 match self.next_char()? {
415 Some('>') => break,
416 Some('\\') => {
417 let span = self.pos.last();
418 let c = match self.next_char()? {
419 Some('u') => self.next_hex_char(span, 4)?,
420 Some('U') => self.next_hex_char(span, 8)?,
421 unexpected => {
422 return Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last()))
423 }
424 };
425
426 iriref.push(c)
427 }
428 Some(c) => {
429 if matches!(
430 c,
431 '\u{00}'..='\u{20}' | '<' | '>' | '"' | '{' | '}' | '|' | '^' | '`' | '\\'
432 ) {
433 return Err(Meta(
434 Error::Unexpected(Unexpected::Char(c)),
435 self.pos.last(),
436 ));
437 }
438
439 iriref.push(c)
440 }
441 None => {
442 return Err(Meta(
443 Error::Unexpected(Unexpected::EndOfFile),
444 self.pos.end(),
445 ))
446 }
447 }
448 }
449
450 match IriRefBuf::from_string(iriref) {
451 Ok(iriref) => Ok(Meta(iriref, self.pos.current())),
452 Err((e, string)) => Err(Meta(Error::InvalidIriRef(e, string), self.pos.current())),
453 }
454 }
455
456 fn next_hex_char(&mut self, mut span: Span, len: u8) -> Result<char, Meta<Error<E>, Span>> {
457 let mut codepoint = 0;
458
459 for _ in 0..len {
460 let c = self.expect_char()?;
461 match c.to_digit(16) {
462 Some(d) => codepoint = codepoint << 4 | d,
463 None => {
464 return Err(Meta(
465 Error::Unexpected(Unexpected::Char(c)),
466 self.pos.last(),
467 ))
468 }
469 }
470 }
471
472 span.set_end(self.pos.current().end());
473 match char::try_from(codepoint) {
474 Ok(c) => Ok(c),
475 Err(_) => Err(Meta(Error::InvalidCodepoint(codepoint), span)),
476 }
477 }
478
479 fn next_string_literal(
482 &mut self,
483 delimiter: char,
484 ) -> Result<Meta<String, Span>, Meta<Error<E>, Span>> {
485 let mut string = String::new();
486
487 let mut long = false;
488
489 loop {
490 match self.next_char()? {
491 Some(c) if c == delimiter => {
492 if !long {
493 if string.is_empty() && self.peek_char()? == Some(delimiter) {
494 self.next_char()?;
495 long = true;
496 } else {
497 break;
498 }
499 } else if self.peek_char()? == Some(delimiter) {
500 self.next_char()?;
501 if self.peek_char()? == Some(delimiter) {
502 self.next_char()?;
503 break;
504 } else {
505 string.push(delimiter);
506 string.push(delimiter);
507 }
508 } else {
509 string.push(delimiter);
510 }
511 }
512 Some('\\') => {
513 let span = self.pos.last();
514 let c = match self.next_char()? {
515 Some('u') => self.next_hex_char(span, 4)?,
516 Some('U') => self.next_hex_char(span, 8)?,
517 Some('t') => '\t',
518 Some('b') => '\u{08}',
519 Some('n') => '\n',
520 Some('r') => '\r',
521 Some('f') => '\u{0c}',
522 Some('\'') => '\'',
523 Some('"') => '"',
524 Some('\\') => '\\',
525 unexpected => {
526 return Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last()))
527 }
528 };
529
530 string.push(c)
531 }
532 Some(c) => {
533 string.push(c)
537 }
538 None => {
539 return Err(Meta(
540 Error::Unexpected(Unexpected::EndOfFile),
541 self.pos.end(),
542 ))
543 }
544 }
545 }
546
547 Ok(Meta(string, self.pos.current()))
548 }
549
550 fn next_numeric_or_dot(
553 &mut self,
554 first: char,
555 ) -> Result<Meta<NumericOrPeriod, Span>, Meta<Error<E>, Span>> {
556 let mut buffer: String = first.into();
557
558 enum State {
559 NonEmptyInteger,
560 Integer,
561 NonENonEmptyDecimal,
562 NonEmptyDecimal,
563 Decimal,
564 ExponentSign,
565 NonEmptyExponent,
566 Exponent,
567 }
568
569 let mut state = match first {
570 '+' => State::NonEmptyInteger,
571 '-' => State::NonEmptyInteger,
572 '.' => State::NonENonEmptyDecimal,
573 '0'..='9' => State::Integer,
574 _ => panic!("invalid first numeric character"),
575 };
576
577 loop {
578 state = match state {
579 State::NonEmptyInteger => match self.peek_char()? {
580 Some('0'..='9') => State::Integer,
581 Some('.') => State::NonEmptyDecimal,
582 unexpected => {
583 return Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last()))
584 }
585 },
586 State::Integer => match self.peek_char()? {
587 Some('0'..='9') => State::Integer,
588 Some('.') => State::NonEmptyDecimal,
589 Some('e' | 'E') => State::ExponentSign,
590 _ => break,
591 },
592 State::NonENonEmptyDecimal => match self.peek_char()? {
593 Some('0'..='9') => State::Decimal,
594 _ => return Ok(Meta(NumericOrPeriod::Period, self.pos.current())),
595 },
596 State::NonEmptyDecimal => match self.peek_char()? {
597 Some('0'..='9') => State::Decimal,
598 Some('e' | 'E') => State::ExponentSign,
599 unexpected => {
600 return Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last()))
601 }
602 },
603 State::Decimal => match self.peek_char()? {
604 Some('0'..='9') => State::Decimal,
605 Some('e' | 'E') => State::ExponentSign,
606 _ => break,
607 },
608 State::ExponentSign => match self.peek_char()? {
609 Some('+' | '-') => State::NonEmptyExponent,
610 Some('0'..='9') => State::Exponent,
611 unexpected => {
612 return Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last()))
613 }
614 },
615 State::NonEmptyExponent => match self.peek_char()? {
616 Some('0'..='9') => State::Exponent,
617 unexpected => {
618 return Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last()))
619 }
620 },
621 State::Exponent => match self.peek_char()? {
622 Some('0'..='9') => State::Exponent,
623 _ => break,
624 },
625 };
626
627 buffer.push(self.expect_char()?);
628 }
629
630 let n = match state {
631 State::Integer => NumericLiteral::Integer(unsafe { IntegerBuf::new_unchecked(buffer) }),
632 State::Decimal => NumericLiteral::Decimal(unsafe { DecimalBuf::new_unchecked(buffer) }),
633 State::Exponent => NumericLiteral::Double(unsafe { DoubleBuf::new_unchecked(buffer) }),
634 _ => unreachable!(),
635 };
636
637 Ok(Meta(NumericOrPeriod::Numeric(n), self.pos.current()))
638 }
639
640 fn next_blank_node_label(&mut self) -> Result<Meta<BlankIdBuf, Span>, Meta<Error<E>, Span>> {
642 match self.next_char()? {
643 Some(':') => {
644 let mut label = String::new();
645 label.push('_');
646 label.push(':');
647 match self.next_char()? {
648 Some(c) if c.is_ascii_digit() || is_pn_chars_u(c) => {
649 label.push(c);
650 let mut last_is_pn_chars = true;
651 loop {
652 match self.peek_char()? {
653 Some(c) if is_pn_chars(c) => {
654 label.push(self.expect_char()?);
655 last_is_pn_chars = true
656 }
657 Some('.') => {
658 label.push(self.expect_char()?);
659 last_is_pn_chars = false;
660 }
661 _ if last_is_pn_chars => break,
662 unexpected => {
663 return Err(Meta(
664 Error::Unexpected(unexpected.into()),
665 self.pos.last(),
666 ))
667 }
668 }
669 }
670
671 Ok(Meta(
672 unsafe { BlankIdBuf::new_unchecked(label) },
673 self.pos.current(),
674 ))
675 }
676 unexpected => Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last())),
677 }
678 }
679 unexpected => Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last())),
680 }
681 }
682
683 fn next_escape(&mut self) -> Result<char, Meta<Error<E>, Span>> {
684 match self.next_char()? {
685 Some(
686 c @ ('_' | '~' | '.' | '-' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ','
687 | ';' | '=' | '/' | '?' | '#' | '@' | '%'),
688 ) => Ok(c),
689 unexpected => Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last())),
690 }
691 }
692
693 fn next_name_or_keyword(
694 &mut self,
695 c: char,
696 ) -> Result<Meta<NameOrKeyword, Span>, Meta<Error<E>, Span>> {
697 let namespace = match c {
699 ':' => (String::new(), self.pos.current()),
700 c if is_pn_chars_base(c) => {
701 let mut namespace = String::new();
702 namespace.push(c);
703 let mut last_is_pn_chars = true;
704 let span = loop {
705 match self.peek_char()? {
706 Some(c) if is_pn_chars(c) => {
707 namespace.push(self.expect_char()?);
708 last_is_pn_chars = true
709 }
710 Some('.') => {
711 namespace.push(self.expect_char()?);
712 last_is_pn_chars = false;
713 }
714 Some(':') if last_is_pn_chars => {
715 let span = self.pos.current();
716 self.expect_char()?;
717 break span;
718 }
719 unexpected => {
720 return if unexpected.map(|c| c.is_whitespace()).unwrap_or(true) {
721 match Keyword::from_str(&namespace) {
722 Ok(kw) => {
723 Ok(Meta(NameOrKeyword::Keyword(kw), self.pos.current()))
724 }
725 Err(NotAKeyword) => break self.pos.current(),
726 }
727 } else {
728 Err(Meta(Error::Unexpected(unexpected.into()), self.pos.end()))
729 }
730 }
731 }
732 };
733
734 (namespace, span)
735 }
736 unexpected => {
737 return Err(Meta(
738 Error::Unexpected(Unexpected::Char(unexpected)),
739 self.pos.last(),
740 ))
741 }
742 };
743
744 let mut suffix = String::new();
746 let mut suffix_span = self.pos.current().next();
747 match self.peek_char()? {
748 Some(c) if is_pn_chars_u(c) || c.is_ascii_digit() || matches!(c, ':' | '%' | '\\') => {
749 let c = match self.expect_char()? {
750 '%' => {
751 self.next_hex_char(self.pos.current().end().into(), 2)?
753 }
754 '\\' => {
755 self.next_escape()?
757 }
758 c => c,
759 };
760
761 suffix.push(c);
762
763 loop {
764 match self.peek_char()? {
765 Some(c)
766 if is_pn_chars(c)
767 || c.is_ascii_digit() || matches!(c, ':' | '%' | '\\') =>
768 {
769 let c = match self.expect_char()? {
770 '%' => {
771 self.next_hex_char(self.pos.current().end().into(), 2)?
773 }
774 '\\' => {
775 self.next_escape()?
777 }
778 c => c,
779 };
780
781 suffix.push(c);
782 }
783 _ => {
784 suffix_span.set_end(self.pos.current().end());
785 break Ok(Meta(
786 NameOrKeyword::CompactIri(namespace, (suffix, suffix_span)),
787 self.pos.current(),
788 ));
789 }
790 }
791 }
792 }
793 _ => Ok(Meta(
794 NameOrKeyword::CompactIri(namespace, (String::new(), self.pos.current())),
795 self.pos.current(),
796 )),
797 }
798 }
799
800 pub fn consume(&mut self) -> Result<Meta<Option<Token>, Span>, Meta<Error<E>, Span>> {
801 self.skip_whitespaces()?;
802 match self.next_char()? {
803 Some('@') => Ok(self.next_langtag_or_keyword()?.map(|t| match t {
804 LanguageTagOrKeyword::LanguageTag(tag) => Some(Token::LangTag(tag)),
805 LanguageTagOrKeyword::Keyword(kw) => Some(Token::Keyword(kw)),
806 })),
807 Some('<') => Ok(self.next_iriref()?.map(|t| Some(Token::IriRef(t)))),
808 Some('"') => Ok(self
809 .next_string_literal('"')?
810 .map(|t| Some(Token::StringLiteral(t)))),
811 Some('\'') => Ok(self
812 .next_string_literal('\'')?
813 .map(|t| Some(Token::StringLiteral(t)))),
814 Some('_') => Ok(self
815 .next_blank_node_label()?
816 .map(|t| Some(Token::BlankNodeLabel(t)))),
817 Some(',') => Ok(Meta(Some(Token::Punct(Punct::Comma)), self.pos.current())),
818 Some(';') => Ok(Meta(
819 Some(Token::Punct(Punct::Semicolon)),
820 self.pos.current(),
821 )),
822 Some('^') => match self.next_char()? {
823 Some('^') => Ok(Meta(Some(Token::Punct(Punct::Carets)), self.pos.current())),
824 unexpected => Err(Meta(Error::Unexpected(unexpected.into()), self.pos.last())),
825 },
826 Some('(') => Ok(Meta(
827 Some(Token::Begin(Delimiter::Parenthesis)),
828 self.pos.current(),
829 )),
830 Some('[') => Ok(Meta(
831 Some(Token::Begin(Delimiter::Bracket)),
832 self.pos.current(),
833 )),
834 Some(')') => Ok(Meta(
835 Some(Token::End(Delimiter::Parenthesis)),
836 self.pos.current(),
837 )),
838 Some(']') => Ok(Meta(
839 Some(Token::End(Delimiter::Bracket)),
840 self.pos.current(),
841 )),
842 Some(c @ ('+' | '-' | '0'..='9' | '.')) => {
843 Ok(self.next_numeric_or_dot(c)?.map(|t| match t {
844 NumericOrPeriod::Numeric(n) => Some(Token::Numeric(n)),
845 NumericOrPeriod::Period => Some(Token::Punct(Punct::Period)),
846 }))
847 }
848 Some(c) => Ok(self.next_name_or_keyword(c)?.map(|t| match t {
849 NameOrKeyword::Keyword(kw) => Some(Token::Keyword(kw)),
850 NameOrKeyword::CompactIri(p, s) => Some(Token::CompactIri(p, s)),
851 })),
852 None => Ok(Meta(None, self.pos.end())),
853 }
854 }
855
856 #[allow(clippy::type_complexity)]
857 pub fn peek(&mut self) -> Result<Meta<Option<&Token>, Span>, Meta<Error<E>, Span>> {
858 if self.lookahead.is_none() {
859 if let locspan::Meta(Some(token), loc) = self.consume()? {
860 self.lookahead = Some(Meta::new(token, loc));
861 }
862 }
863
864 match &self.lookahead {
865 Some(locspan::Meta(token, loc)) => Ok(Meta::new(Some(token), *loc)),
866 None => Ok(Meta::new(None, self.pos.end())),
867 }
868 }
869
870 #[allow(clippy::type_complexity, clippy::should_implement_trait)]
871 pub fn next(&mut self) -> Result<Meta<Option<Token>, Span>, Meta<Error<E>, Span>> {
872 match self.lookahead.take() {
873 Some(locspan::Meta(token, loc)) => Ok(Meta::new(Some(token), loc)),
874 None => self.consume(),
875 }
876 }
877}
878
879impl<E, C: Iterator<Item = Result<DecodedChar, E>>> Tokens for Lexer<C, E> {
880 type Error = Error<E>;
881
882 fn peek(&mut self) -> Result<Meta<Option<&Token>, Span>, Meta<Error<E>, Span>> {
883 self.peek()
884 }
885
886 fn next(&mut self) -> Result<Meta<Option<Token>, Span>, Meta<Error<E>, Span>> {
887 self.next()
888 }
889
890 fn last(&self) -> Span {
891 self.pos.last_span
892 }
893}
894
895impl<E, C: Iterator<Item = Result<DecodedChar, E>>> Iterator for Lexer<C, E> {
896 type Item = Result<Meta<Token, Span>, Meta<Error<E>, Span>>;
897
898 fn next(&mut self) -> Option<Self::Item> {
899 match self.next() {
900 Ok(Meta(Some(token), loc)) => Some(Ok(Meta::new(token, loc))),
901 Ok(Meta(None, _)) => None,
902 Err(e) => Some(Err(e)),
903 }
904 }
905}
906
907fn is_pn_chars_base(c: char) -> bool {
908 matches!(c, 'A'..='Z' | 'a'..='z' | '\u{00c0}'..='\u{00d6}' | '\u{00d8}'..='\u{00f6}' | '\u{00f8}'..='\u{02ff}' | '\u{0370}'..='\u{037d}' | '\u{037f}'..='\u{1fff}' | '\u{200c}'..='\u{200d}' | '\u{2070}'..='\u{218f}' | '\u{2c00}'..='\u{2fef}' | '\u{3001}'..='\u{d7ff}' | '\u{f900}'..='\u{fdcf}' | '\u{fdf0}'..='\u{fffd}' | '\u{10000}'..='\u{effff}')
909}
910
911fn is_pn_chars_u(c: char) -> bool {
912 is_pn_chars_base(c) || c == '_'
913}
914
915fn is_pn_chars(c: char) -> bool {
916 is_pn_chars_u(c)
917 || matches!(c, '-' | '0'..='9' | '\u{00b7}' | '\u{0300}'..='\u{036f}' | '\u{203f}'..='\u{2040}')
918}