1use crate::{BlankIdBuf, DecimalBuf, DoubleBuf, IntegerBuf, NumericLiteral};
2use decoded_char::DecodedChar;
3use iref::IriRefBuf;
4use langtag::LangTagBuf;
5use locspan::Span;
6use std::fmt;
7use std::iter::Peekable;
8use std::str::FromStr;
9
10pub trait Tokens {
12 type Error;
13
14 #[allow(clippy::type_complexity)]
15 fn peek(&mut self) -> Result<(Option<&Token>, Span), (Self::Error, Span)>;
16
17 #[allow(clippy::type_complexity)]
18 fn next(&mut self) -> Result<(Option<Token>, Span), (Self::Error, Span)>;
19
20 fn last(&self) -> Span;
22}
23
24#[derive(Debug, thiserror::Error)]
26pub enum Unexpected {
27 #[error("unexpected character `{0}`")]
28 Char(char),
29
30 #[error("unexpected end of file")]
31 EndOfFile,
32}
33
34impl From<Option<char>> for Unexpected {
35 fn from(value: Option<char>) -> Self {
36 match value {
37 Some(c) => Self::Char(c),
38 None => Self::EndOfFile,
39 }
40 }
41}
42
43#[derive(Debug, thiserror::Error)]
45pub enum Error<E = std::convert::Infallible> {
46 #[error("invalid language tag")]
47 InvalidLangTag,
48
49 #[error("invalid character code point {0:x}")]
50 InvalidCodepoint(u32),
51
52 #[error("invalid IRI reference <{0}>")]
53 InvalidIriRef(iref::iri::InvalidIriRef<String>),
54
55 #[error(transparent)]
56 Unexpected(Unexpected),
57
58 #[error(transparent)]
59 Stream(E),
60}
61
62#[derive(Debug)]
64pub enum Token {
65 Keyword(Keyword),
66 Begin(Delimiter),
67 End(Delimiter),
68 LangTag(LangTagBuf),
69 IriRef(IriRefBuf),
70 StringLiteral(String),
71 BlankNodeLabel(BlankIdBuf),
72 Punct(Punct),
73 CompactIri((String, Span), (String, Span)),
74 Numeric(NumericLiteral),
75}
76
77impl fmt::Display for Token {
78 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
79 match self {
80 Self::Keyword(kw) => write!(f, "keyword `{kw}`"),
81 Self::Begin(d) => write!(f, "opening `{}`", d.begin()),
82 Self::End(d) => write!(f, "closing `{}`", d.end()),
83 Self::LangTag(tag) => write!(f, "language tag `{tag}`"),
84 Self::IriRef(iri_ref) => write!(f, "IRI reference <{iri_ref}>"),
85 Self::StringLiteral(string) => {
86 write!(f, "string literal \"{}\"", DisplayStringLiteral(string))
87 }
88 Self::BlankNodeLabel(label) => write!(f, "blank node label `{label}`"),
89 Self::Punct(p) => p.fmt(f),
90 Self::CompactIri((prefix, _), (suffix, _)) => {
91 write!(f, "compact IRI `{prefix}:{suffix}`")
92 }
93 Self::Numeric(n) => write!(f, "numeric literal `{n}`"),
94 }
95 }
96}
97
98pub struct DisplayStringLiteral<'a>(pub &'a str);
100
101impl fmt::Display for DisplayStringLiteral<'_> {
102 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
103 for c in self.0.chars() {
104 match c {
105 '"' => write!(f, "\\u0022"),
106 '\\' => write!(f, "\\u005c"),
107 '\n' => write!(f, "\\n"),
108 '\r' => write!(f, "\\r"),
109 '\t' => write!(f, "\\t"),
110 '\u{08}' => write!(f, "\\b"),
111 '\u{0c}' => write!(f, "\\f"),
112 c => c.fmt(f),
113 }?
114 }
115
116 Ok(())
117 }
118}
119
120#[derive(Debug)]
121pub enum Keyword {
122 A,
123 Prefix,
124 Base,
125 SparqlBase,
126 SparqlPrefix,
127 True,
128 False,
129}
130
131#[derive(Clone)]
132pub struct NotAKeyword;
133
134impl FromStr for Keyword {
135 type Err = NotAKeyword;
136
137 fn from_str(s: &str) -> Result<Self, Self::Err> {
138 if s == "a" {
139 Ok(Self::A)
140 } else if s == "true" {
141 Ok(Self::True)
142 } else if s == "false" {
143 Ok(Self::False)
144 } else if s == unicase::Ascii::new("BASE") {
145 Ok(Self::SparqlBase)
146 } else if s == unicase::Ascii::new("PREFIX") {
147 Ok(Self::SparqlPrefix)
148 } else if s == "@prefix" {
149 Ok(Self::Prefix)
150 } else if s == "@base" {
151 Ok(Self::Base)
152 } else {
153 Err(NotAKeyword)
154 }
155 }
156}
157
158impl fmt::Display for Keyword {
159 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
160 match self {
161 Self::A => write!(f, "a"),
162 Self::Prefix => write!(f, "@prefix"),
163 Self::Base => write!(f, "@base"),
164 Self::SparqlBase => write!(f, "BASE"),
165 Self::SparqlPrefix => write!(f, "PREFIX"),
166 Self::True => write!(f, "true"),
167 Self::False => write!(f, "false"),
168 }
169 }
170}
171
172#[derive(Debug)]
173pub enum Delimiter {
174 Parenthesis,
175 Bracket,
176}
177
178impl Delimiter {
179 pub fn begin(&self) -> char {
180 match self {
181 Self::Parenthesis => '(',
182 Self::Bracket => '[',
183 }
184 }
185
186 pub fn end(&self) -> char {
187 match self {
188 Self::Parenthesis => ')',
189 Self::Bracket => ']',
190 }
191 }
192}
193
194#[derive(Debug)]
195pub enum Punct {
196 Period,
197 Semicolon,
198 Comma,
199 Carets,
200}
201
202impl fmt::Display for Punct {
203 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
204 match self {
205 Self::Period => write!(f, "dot `.`"),
206 Self::Semicolon => write!(f, "semicolon `;`"),
207 Self::Comma => write!(f, "comma `,`"),
208 Self::Carets => write!(f, "carets `^^`"),
209 }
210 }
211}
212
213struct Position {
215 span: Span,
216 last_span: Span,
217}
218
219impl Position {
220 fn current(&self) -> Span {
221 self.span
222 }
223
224 fn end(&self) -> Span {
225 self.span.end.into()
226 }
227
228 fn last(&self) -> Span {
229 self.last_span
230 }
231}
232
233pub struct Lexer<C: Iterator<Item = Result<DecodedChar, E>>, E> {
237 chars: Peekable<C>,
238 pos: Position,
239 lookahead: Option<(Token, Span)>,
240}
241
242impl<C: Iterator<Item = Result<DecodedChar, E>>, E> Lexer<C, E> {
243 pub fn new(chars: C) -> Self {
244 Self {
245 chars: chars.peekable(),
246 pos: Position {
247 span: Span::default(),
248 last_span: Span::default(),
249 },
250 lookahead: None,
251 }
252 }
253}
254
255enum LanguageTagOrKeyword {
256 Keyword(Keyword),
257 LanguageTag(LangTagBuf),
258}
259
260enum NameOrKeyword {
261 Keyword(Keyword),
262 CompactIri((String, Span), (String, Span)),
263}
264
265enum NumericOrPeriod {
266 Numeric(NumericLiteral),
267 Period,
268}
269
270impl<C: Iterator<Item = Result<DecodedChar, E>>, E> Lexer<C, E> {
271 fn peek_char(&mut self) -> Result<Option<char>, (Error<E>, Span)> {
272 match self.chars.peek() {
273 None => Ok(None),
274 Some(Ok(c)) => Ok(Some(c.chr())),
275 Some(Err(_)) => self.next_char(),
276 }
277 }
278
279 fn next_char(&mut self) -> Result<Option<char>, (Error<E>, Span)> {
280 match self.chars.next() {
281 None => Ok(None),
282 Some(Ok(c)) => {
283 self.pos.span.push(c.len());
284 self.pos.last_span.clear();
285 self.pos.last_span.push(c.len());
286 Ok(Some(c.chr()))
287 }
288 Some(Err(e)) => Err((Error::Stream(e), self.pos.end())),
289 }
290 }
291
292 fn expect_char(&mut self) -> Result<char, (Error<E>, Span)> {
293 self.next_char()?
294 .ok_or_else(|| (Error::Unexpected(Unexpected::EndOfFile), self.pos.end()))
295 }
296
297 fn skip_whitespaces(&mut self) -> Result<(), (Error<E>, Span)> {
298 while let Some(c) = self.peek_char()? {
299 if c.is_whitespace() {
300 self.next_char()?;
301 } else if c == '#' {
302 self.next_comment()?;
303 } else {
304 break;
305 }
306 }
307
308 self.pos.span.clear();
309 Ok(())
310 }
311
312 fn next_comment(&mut self) -> Result<(), (Error<E>, Span)> {
319 loop {
320 if matches!(self.next_char()?, None | Some('\n')) {
321 break Ok(());
322 }
323 }
324 }
325
326 fn next_langtag_or_keyword(
328 &mut self,
329 ) -> Result<(LanguageTagOrKeyword, Span), (Error<E>, Span)> {
330 let mut tag = String::new();
331
332 loop {
333 match self.peek_char()? {
334 None => {
335 if tag.is_empty() {
336 return Err((Error::InvalidLangTag, self.pos.current()));
337 } else {
338 break;
339 }
340 }
341 Some(c) => {
342 if c.is_ascii_alphabetic() {
343 tag.push(self.expect_char()?);
344 } else if tag.is_empty() {
345 return Err((Error::InvalidLangTag, self.pos.current()));
346 } else {
347 break;
348 }
349 }
350 }
351 }
352
353 let mut empty_subtag = true;
354 if let Some('-') = self.peek_char()? {
355 tag.push(self.expect_char()?);
356 loop {
357 match self.peek_char()? {
358 Some('-') if !empty_subtag => tag.push(self.expect_char()?),
359 Some(c) if c.is_ascii_alphanumeric() => {
360 empty_subtag = false;
361 tag.push(self.expect_char()?)
362 }
363 Some(c) => {
364 if c.is_whitespace() {
365 if empty_subtag {
366 return Err((Error::InvalidLangTag, self.pos.current()));
367 } else {
368 break;
369 }
370 } else {
371 self.next_char()?;
372 return Err((Error::Unexpected(Unexpected::Char(c)), self.pos.last()));
373 }
374 }
375 None => {
376 if empty_subtag {
377 return Err((Error::InvalidLangTag, self.pos.current()));
378 } else {
379 break;
380 }
381 }
382 }
383 }
384 }
385
386 match tag.as_str() {
387 "prefix" => Ok((
388 LanguageTagOrKeyword::Keyword(Keyword::Prefix),
389 self.pos.current(),
390 )),
391 "base" => Ok((
392 LanguageTagOrKeyword::Keyword(Keyword::Base),
393 self.pos.current(),
394 )),
395 _ => match LangTagBuf::new(tag) {
396 Ok(tag) => Ok((LanguageTagOrKeyword::LanguageTag(tag), self.pos.current())),
397 Err(_) => Err((Error::InvalidLangTag, self.pos.current())),
398 },
399 }
400 }
401
402 fn next_iriref(&mut self) -> Result<(IriRefBuf, Span), (Error<E>, Span)> {
405 let mut iriref = String::new();
406
407 loop {
408 match self.next_char()? {
409 Some('>') => break,
410 Some('\\') => {
411 let span = self.pos.last();
412 let c = match self.next_char()? {
413 Some('u') => self.next_hex_char(span, 4)?,
414 Some('U') => self.next_hex_char(span, 8)?,
415 unexpected => {
416 return Err((Error::Unexpected(unexpected.into()), self.pos.last()))
417 }
418 };
419
420 iriref.push(c)
421 }
422 Some(c) => {
423 if matches!(
424 c,
425 '\u{00}'..='\u{20}' | '<' | '>' | '"' | '{' | '}' | '|' | '^' | '`' | '\\'
426 ) {
427 return Err((Error::Unexpected(Unexpected::Char(c)), self.pos.last()));
428 }
429
430 iriref.push(c)
431 }
432 None => return Err((Error::Unexpected(Unexpected::EndOfFile), self.pos.end())),
433 }
434 }
435
436 match IriRefBuf::new(iriref) {
437 Ok(iriref) => Ok((iriref, self.pos.current())),
438 Err(e) => Err((Error::InvalidIriRef(e), self.pos.current())),
440 }
441 }
442
443 fn next_hex_char(&mut self, mut span: Span, len: u8) -> Result<char, (Error<E>, Span)> {
444 let mut codepoint = 0;
445
446 for _ in 0..len {
447 let c = self.expect_char()?;
448 match c.to_digit(16) {
449 Some(d) => codepoint = (codepoint << 4) | d,
450 None => return Err((Error::Unexpected(Unexpected::Char(c)), self.pos.last())),
451 }
452 }
453
454 span.end = self.pos.current().end;
456 match char::try_from(codepoint) {
457 Ok(c) => Ok(c),
458 Err(_) => Err((Error::InvalidCodepoint(codepoint), span)),
459 }
460 }
461
462 fn next_string_literal(&mut self, delimiter: char) -> Result<(String, Span), (Error<E>, Span)> {
465 let mut string = String::new();
466
467 let mut long = false;
468
469 loop {
470 match self.next_char()? {
471 Some(c) if c == delimiter => {
472 if !long {
473 if string.is_empty() && self.peek_char()? == Some(delimiter) {
474 self.next_char()?;
475 long = true;
476 } else {
477 break;
478 }
479 } else if self.peek_char()? == Some(delimiter) {
480 self.next_char()?;
481 if self.peek_char()? == Some(delimiter) {
482 self.next_char()?;
483 break;
484 } else {
485 string.push(delimiter);
486 string.push(delimiter);
487 }
488 } else {
489 string.push(delimiter);
490 }
491 }
492 Some('\\') => {
493 let span = self.pos.last();
494 let c = match self.next_char()? {
495 Some('u') => self.next_hex_char(span, 4)?,
496 Some('U') => self.next_hex_char(span, 8)?,
497 Some('t') => '\t',
498 Some('b') => '\u{08}',
499 Some('n') => '\n',
500 Some('r') => '\r',
501 Some('f') => '\u{0c}',
502 Some('\'') => '\'',
503 Some('"') => '"',
504 Some('\\') => '\\',
505 unexpected => {
506 return Err((Error::Unexpected(unexpected.into()), self.pos.last()))
507 }
508 };
509
510 string.push(c)
511 }
512 Some(c) => {
513 string.push(c)
517 }
518 None => return Err((Error::Unexpected(Unexpected::EndOfFile), self.pos.end())),
519 }
520 }
521
522 Ok((string, self.pos.current()))
523 }
524
525 fn next_numeric_or_dot(
528 &mut self,
529 first: char,
530 ) -> Result<(NumericOrPeriod, Span), (Error<E>, Span)> {
531 let mut buffer: String = first.into();
532
533 enum State {
534 NonEmptyInteger,
535 Integer,
536 NonENonEmptyDecimal,
537 NonEmptyDecimal,
538 Decimal,
539 ExponentSign,
540 NonEmptyExponent,
541 Exponent,
542 }
543
544 let mut state = match first {
545 '+' => State::NonEmptyInteger,
546 '-' => State::NonEmptyInteger,
547 '.' => State::NonENonEmptyDecimal,
548 '0'..='9' => State::Integer,
549 _ => panic!("invalid first numeric character"),
550 };
551
552 loop {
553 state = match state {
554 State::NonEmptyInteger => match self.peek_char()? {
555 Some('0'..='9') => State::Integer,
556 Some('.') => State::NonEmptyDecimal,
557 unexpected => {
558 return Err((Error::Unexpected(unexpected.into()), self.pos.last()))
559 }
560 },
561 State::Integer => match self.peek_char()? {
562 Some('0'..='9') => State::Integer,
563 Some('.') => State::NonEmptyDecimal,
564 Some('e' | 'E') => State::ExponentSign,
565 _ => break,
566 },
567 State::NonENonEmptyDecimal => match self.peek_char()? {
568 Some('0'..='9') => State::Decimal,
569 _ => return Ok((NumericOrPeriod::Period, self.pos.current())),
570 },
571 State::NonEmptyDecimal => match self.peek_char()? {
572 Some('0'..='9') => State::Decimal,
573 Some('e' | 'E') => State::ExponentSign,
574 unexpected => {
575 return Err((Error::Unexpected(unexpected.into()), self.pos.last()))
576 }
577 },
578 State::Decimal => match self.peek_char()? {
579 Some('0'..='9') => State::Decimal,
580 Some('e' | 'E') => State::ExponentSign,
581 _ => break,
582 },
583 State::ExponentSign => match self.peek_char()? {
584 Some('+' | '-') => State::NonEmptyExponent,
585 Some('0'..='9') => State::Exponent,
586 unexpected => {
587 return Err((Error::Unexpected(unexpected.into()), self.pos.last()))
588 }
589 },
590 State::NonEmptyExponent => match self.peek_char()? {
591 Some('0'..='9') => State::Exponent,
592 unexpected => {
593 return Err((Error::Unexpected(unexpected.into()), self.pos.last()))
594 }
595 },
596 State::Exponent => match self.peek_char()? {
597 Some('0'..='9') => State::Exponent,
598 _ => break,
599 },
600 };
601
602 buffer.push(self.expect_char()?);
603 }
604
605 let n = match state {
606 State::Integer => NumericLiteral::Integer(unsafe { IntegerBuf::new_unchecked(buffer) }),
607 State::Decimal => NumericLiteral::Decimal(unsafe { DecimalBuf::new_unchecked(buffer) }),
608 State::Exponent => NumericLiteral::Double(unsafe { DoubleBuf::new_unchecked(buffer) }),
609 _ => unreachable!(),
610 };
611
612 Ok((NumericOrPeriod::Numeric(n), self.pos.current()))
613 }
614
615 fn next_blank_node_label(&mut self) -> Result<(BlankIdBuf, Span), (Error<E>, Span)> {
617 match self.next_char()? {
618 Some(':') => {
619 let mut label = String::new();
620 label.push('_');
621 label.push(':');
622 match self.next_char()? {
623 Some(c) if c.is_ascii_digit() || is_pn_chars_u(c) => {
624 label.push(c);
625 let mut last_is_pn_chars = true;
626 loop {
627 match self.peek_char()? {
628 Some(c) if is_pn_chars(c) => {
629 label.push(self.expect_char()?);
630 last_is_pn_chars = true
631 }
632 Some('.') => {
633 label.push(self.expect_char()?);
634 last_is_pn_chars = false;
635 }
636 _ if last_is_pn_chars => break,
637 unexpected => {
638 return Err((
639 Error::Unexpected(unexpected.into()),
640 self.pos.last(),
641 ))
642 }
643 }
644 }
645
646 Ok((
647 unsafe { BlankIdBuf::new_unchecked(label) },
648 self.pos.current(),
649 ))
650 }
651 unexpected => Err((Error::Unexpected(unexpected.into()), self.pos.last())),
652 }
653 }
654 unexpected => Err((Error::Unexpected(unexpected.into()), self.pos.last())),
655 }
656 }
657
658 fn next_escape(&mut self) -> Result<char, (Error<E>, Span)> {
659 match self.next_char()? {
660 Some(
661 c @ ('_' | '~' | '.' | '-' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ','
662 | ';' | '=' | '/' | '?' | '#' | '@' | '%'),
663 ) => Ok(c),
664 unexpected => Err((Error::Unexpected(unexpected.into()), self.pos.last())),
665 }
666 }
667
668 fn next_name_or_keyword(&mut self, c: char) -> Result<(NameOrKeyword, Span), (Error<E>, Span)> {
669 let namespace = match c {
671 ':' => (String::new(), self.pos.current()),
672 c if is_pn_chars_base(c) => {
673 let mut namespace = String::new();
674 namespace.push(c);
675 let mut last_is_pn_chars = true;
676 let span = loop {
677 match self.peek_char()? {
678 Some(c) if is_pn_chars(c) => {
679 namespace.push(self.expect_char()?);
680 last_is_pn_chars = true
681 }
682 Some('.') => {
683 namespace.push(self.expect_char()?);
684 last_is_pn_chars = false;
685 }
686 Some(':') if last_is_pn_chars => {
687 let span = self.pos.current();
688 self.expect_char()?;
689 break span;
690 }
691 unexpected => {
692 return if unexpected.map(|c| c.is_whitespace()).unwrap_or(true) {
693 match Keyword::from_str(&namespace) {
694 Ok(kw) => Ok((NameOrKeyword::Keyword(kw), self.pos.current())),
695 Err(NotAKeyword) => break self.pos.current(),
696 }
697 } else {
698 Err((Error::Unexpected(unexpected.into()), self.pos.end()))
699 }
700 }
701 }
702 };
703
704 (namespace, span)
705 }
706 unexpected => {
707 return Err((
708 Error::Unexpected(Unexpected::Char(unexpected)),
709 self.pos.last(),
710 ))
711 }
712 };
713
714 let mut suffix = String::new();
716 let mut suffix_span = self.pos.current().next();
717 match self.peek_char()? {
718 Some(c) if is_pn_chars_u(c) || c.is_ascii_digit() || matches!(c, ':' | '%' | '\\') => {
719 let c = match self.expect_char()? {
720 '%' => {
721 self.next_hex_char(self.pos.current().end.into(), 2)?
723 }
724 '\\' => {
725 self.next_escape()?
727 }
728 c => c,
729 };
730
731 suffix.push(c);
732
733 loop {
734 match self.peek_char()? {
735 Some(c)
736 if is_pn_chars(c)
737 || c.is_ascii_digit() || matches!(c, ':' | '%' | '\\') =>
738 {
739 let c = match self.expect_char()? {
740 '%' => {
741 self.next_hex_char(self.pos.current().end.into(), 2)?
743 }
744 '\\' => {
745 self.next_escape()?
747 }
748 c => c,
749 };
750
751 suffix.push(c);
752 }
753 _ => {
754 suffix_span.end = self.pos.current().end;
756 break Ok((
757 NameOrKeyword::CompactIri(namespace, (suffix, suffix_span)),
758 self.pos.current(),
759 ));
760 }
761 }
762 }
763 }
764 _ => Ok((
765 NameOrKeyword::CompactIri(namespace, (String::new(), self.pos.current())),
766 self.pos.current(),
767 )),
768 }
769 }
770
771 pub fn consume(&mut self) -> Result<(Option<Token>, Span), (Error<E>, Span)> {
772 self.skip_whitespaces()?;
773 match self.next_char()? {
774 Some('@') => Ok({
775 let t = self.next_langtag_or_keyword()?;
776 let token = match t.0 {
777 LanguageTagOrKeyword::LanguageTag(tag) => Token::LangTag(tag),
778 LanguageTagOrKeyword::Keyword(kw) => Token::Keyword(kw),
779 };
780
781 (Some(token), t.1)
782 }),
783 Some('<') => Ok({
784 let t = self.next_iriref()?;
785 (Some(Token::IriRef(t.0)), t.1)
786 }),
787 Some('"') => Ok({
788 let t = self.next_string_literal('"')?;
789
790 (Some(Token::StringLiteral(t.0)), t.1)
791 }),
792 Some('\'') => Ok({
793 let t = self.next_string_literal('\'')?;
794
795 (Some(Token::StringLiteral(t.0)), t.1)
796 }),
797 Some('_') => Ok({
798 let t = self.next_blank_node_label()?;
799
800 (Some(Token::BlankNodeLabel(t.0)), t.1)
801 }),
802 Some(',') => Ok((Some(Token::Punct(Punct::Comma)), self.pos.current())),
803 Some(';') => Ok((Some(Token::Punct(Punct::Semicolon)), self.pos.current())),
804 Some('^') => match self.next_char()? {
805 Some('^') => Ok((Some(Token::Punct(Punct::Carets)), self.pos.current())),
806 unexpected => Err((Error::Unexpected(unexpected.into()), self.pos.last())),
807 },
808 Some('(') => Ok((
809 Some(Token::Begin(Delimiter::Parenthesis)),
810 self.pos.current(),
811 )),
812 Some('[') => Ok((Some(Token::Begin(Delimiter::Bracket)), self.pos.current())),
813 Some(')') => Ok((Some(Token::End(Delimiter::Parenthesis)), self.pos.current())),
814 Some(']') => Ok((Some(Token::End(Delimiter::Bracket)), self.pos.current())),
815 Some(c @ ('+' | '-' | '0'..='9' | '.')) => Ok({
816 let t = self.next_numeric_or_dot(c)?;
817
818 let token = match t.0 {
819 NumericOrPeriod::Numeric(n) => Token::Numeric(n),
820 NumericOrPeriod::Period => Token::Punct(Punct::Period),
821 };
822
823 (Some(token), t.1)
824 }),
825 Some(c) => Ok({
826 let t = self.next_name_or_keyword(c)?;
827
828 let token = match t.0 {
829 NameOrKeyword::Keyword(kw) => Token::Keyword(kw),
830 NameOrKeyword::CompactIri(p, s) => Token::CompactIri(p, s),
831 };
832
833 (Some(token), t.1)
834 }),
835 None => Ok((None, self.pos.end())),
836 }
837 }
838
839 #[allow(clippy::type_complexity)]
840 pub fn peek(&mut self) -> Result<(Option<&Token>, Span), (Error<E>, Span)> {
841 if self.lookahead.is_none() {
842 if let (Some(token), loc) = self.consume()? {
843 self.lookahead = Some((token, loc));
844 }
845 }
846
847 match &self.lookahead {
848 Some((token, loc)) => Ok((Some(token), *loc)),
849 None => Ok((None, self.pos.end())),
850 }
851 }
852
853 #[allow(clippy::type_complexity, clippy::should_implement_trait)]
854 pub fn next(&mut self) -> Result<(Option<Token>, Span), (Error<E>, Span)> {
855 match self.lookahead.take() {
856 Some((token, loc)) => Ok((Some(token), loc)),
857 None => self.consume(),
858 }
859 }
860}
861
862impl<E, C: Iterator<Item = Result<DecodedChar, E>>> Tokens for Lexer<C, E> {
863 type Error = Error<E>;
864
865 fn peek(&mut self) -> Result<(Option<&Token>, Span), (Error<E>, Span)> {
866 self.peek()
867 }
868
869 fn next(&mut self) -> Result<(Option<Token>, Span), (Error<E>, Span)> {
870 self.next()
871 }
872
873 fn last(&self) -> Span {
874 self.pos.last_span
875 }
876}
877
878impl<E, C: Iterator<Item = Result<DecodedChar, E>>> Iterator for Lexer<C, E> {
879 type Item = Result<(Token, Span), (Error<E>, Span)>;
880
881 fn next(&mut self) -> Option<Self::Item> {
882 match self.next() {
883 Ok((Some(token), loc)) => Some(Ok((token, loc))),
884 Ok((None, _)) => None,
885 Err(e) => Some(Err(e)),
886 }
887 }
888}
889
890fn is_pn_chars_base(c: char) -> bool {
891 matches!(c, 'A'..='Z' | 'a'..='z' | '\u{00c0}'..='\u{00d6}' | '\u{00d8}'..='\u{00f6}' | '\u{00f8}'..='\u{02ff}' | '\u{0370}'..='\u{037d}' | '\u{037f}'..='\u{1fff}' | '\u{200c}'..='\u{200d}' | '\u{2070}'..='\u{218f}' | '\u{2c00}'..='\u{2fef}' | '\u{3001}'..='\u{d7ff}' | '\u{f900}'..='\u{fdcf}' | '\u{fdf0}'..='\u{fffd}' | '\u{10000}'..='\u{effff}')
892}
893
894fn is_pn_chars_u(c: char) -> bool {
895 is_pn_chars_base(c) || c == '_'
896}
897
898fn is_pn_chars(c: char) -> bool {
899 is_pn_chars_u(c)
900 || matches!(c, '-' | '0'..='9' | '\u{00b7}' | '\u{0300}'..='\u{036f}' | '\u{203f}'..='\u{2040}')
901}