use crate::error::{ParseError, Result};
use crate::types::{Span, StringPart, Term, Token};
use super::Parser;
const URI_SCHEME_SPECIAL_CHARS: &[char] = &['-', '.', '+'];
const URI_SPECIAL_CHARS: &[char] = &[
'~', '!', '@', '$', '%', '&', '*', '-', '=', '_', '+', ':', '\'', ',', '.', '/', '?',
];
fn is_scheme_char(c: char) -> bool {
c.is_alphanumeric() || URI_SCHEME_SPECIAL_CHARS.contains(&c)
}
fn is_uri_char(c: char) -> bool {
c.is_alphanumeric() || URI_SPECIAL_CHARS.contains(&c)
}
fn is_path_char(c: char) -> bool {
c.is_alphanumeric() || matches!(c, '.' | '_' | '-' | '+' | '~')
}
fn push_path_text(parts: &mut Vec<StringPart>, s: &str) {
if let Some(StringPart::TextPart(last)) = parts.last_mut() {
last.push_str(s);
} else {
parts.push(StringPart::TextPart(s.to_owned()));
}
}
impl Parser {
pub(super) fn looks_like_uri(&self) -> bool {
let Token::Identifier(scheme) = &self.current.value else {
return false;
};
if self.lexer.peek() != Some(':') {
return false;
}
if !matches!(self.lexer.peek_ahead(1), Some(c) if is_uri_char(c)) {
return false;
}
scheme.chars().all(is_scheme_char)
}
pub(super) fn parse_path(&mut self) -> Result<Term> {
let ann = self.with_raw_ann(|p| {
let mut parts = Vec::new();
match &p.current.value {
Token::Identifier(ident) => parts.push(StringPart::TextPart(ident.to_string())),
Token::TDot if p.lexer.peek() == Some('.') => {
p.lexer.advance();
push_path_text(&mut parts, "..");
}
Token::TDot => push_path_text(&mut parts, "."),
Token::TDiv => push_path_text(&mut parts, "/"),
Token::TTilde => push_path_text(&mut parts, "~"),
_ => {}
}
loop {
match p.lexer.peek() {
Some('$') if p.lexer.at("${") => {
parts.push(p.parse_string_interpolation()?);
}
Some(ch) if ch.is_alphanumeric() || matches!(ch, '.' | '_' | '-' | '+') => {
push_path_text(&mut parts, &p.parse_path_part());
}
Some('/') => {
p.lexer.advance();
push_path_text(&mut parts, "/");
}
_ => break,
}
}
if let Some(StringPart::TextPart(text)) = parts.last()
&& text.ends_with('/')
{
let current_pos = p.lexer.current_pos().start as usize;
let slash_pos = Span::new(current_pos.saturating_sub(1), current_pos);
return Err(ParseError::invalid(
slash_pos,
"path cannot end with a trailing slash",
Some("remove the trailing '/' or add more path components".to_string()),
));
}
Ok(parts)
})?;
Ok(Term::Path(ann))
}
fn parse_path_part(&mut self) -> String {
let mut text = String::new();
while let Some(ch) = self.lexer.peek().filter(|&c| is_path_char(c)) {
text.push(ch);
self.lexer.advance();
}
text
}
pub(super) fn parse_uri(&mut self) -> Result<Term> {
let ann = self.with_raw_ann(|p| {
let Token::Identifier(scheme) = &p.current.value else {
unreachable!("looks_like_uri guards this")
};
let mut uri_text = scheme.clone();
debug_assert_eq!(p.lexer.peek(), Some(':'));
p.lexer.advance();
uri_text.push(':');
while let Some(ch) = p.lexer.peek() {
if is_uri_char(ch) {
uri_text.push(ch);
p.lexer.advance();
} else {
break;
}
}
Ok(vec![vec![StringPart::TextPart(uri_text.to_string())]])
})?;
Ok(Term::SimpleString(ann))
}
fn is_path_content_at(&self, offset: usize) -> bool {
match self.lexer.peek_ahead(offset) {
Some(c) if is_path_char(c) => true,
Some('$') => self.lexer.peek_ahead(offset + 1) == Some('{'),
_ => false,
}
}
const fn has_preceding_whitespace(&self) -> bool {
self.lexer.recent_hspace > 0 || self.lexer.recent_newlines > 0
}
pub(super) fn looks_like_path(&self) -> bool {
match &self.current.value {
Token::Identifier(_) => {
self.lexer.peek() == Some('/')
&& !self.lexer.at("//")
&& self.is_path_content_at(1)
&& !self.has_preceding_whitespace()
}
Token::TDot => match (self.lexer.peek(), self.lexer.peek_ahead(1)) {
(Some('/'), _) => self.is_path_content_at(1), (Some('.'), Some('/')) => self.is_path_content_at(2), _ => false,
},
Token::TDiv => self.is_path_content_at(0) && !self.has_preceding_whitespace(),
Token::TTilde => self.lexer.peek() == Some('/') && self.is_path_content_at(1),
_ => false,
}
}
}