use crate::error::{RuleRecognizerError, TurtleParseError, TurtleResult};
use crate::toolkit::lexer::{TokenOrLineJump, TokenRecognizer};
use std::io::{BufRead, Read};
use std::marker::PhantomData;
pub trait RuleRecognizer {
type TokenRecognizer: TokenRecognizer;
type Output;
type Context;
fn recognize_next(
self,
token: TokenOrLineJump<<Self::TokenRecognizer as TokenRecognizer>::Token<'_>>,
context: &mut Self::Context,
results: &mut Vec<Self::Output>,
errors: &mut Vec<RuleRecognizerError>,
) -> Self;
}
pub struct StreamingParser<R, T: crate::toolkit::lexer::TokenRecognizer, P: RuleRecognizer> {
tokenizer: crate::toolkit::lexer::StreamingTokenizer<R, T>,
rule_recognizer: P,
context: P::Context,
_phantom: PhantomData<P>,
}
impl<R: BufRead, T: TokenRecognizer, P: RuleRecognizer<TokenRecognizer = T>>
StreamingParser<R, T, P>
{
pub fn new(
tokenizer: crate::toolkit::lexer::StreamingTokenizer<R, T>,
rule_recognizer: P,
context: P::Context,
) -> Self {
Self {
tokenizer,
rule_recognizer,
context,
_phantom: PhantomData,
}
}
}
impl<R: BufRead, T: TokenRecognizer, P: RuleRecognizer<TokenRecognizer = T>> Iterator
for StreamingParser<R, T, P>
where
P: Clone,
{
type Item = TurtleResult<P::Output>;
fn next(&mut self) -> Option<Self::Item> {
loop {
match self.tokenizer.next() {
None => return None, Some(Err(e)) => {
return Some(Err(TurtleParseError::syntax(
crate::error::TurtleSyntaxError::Generic {
message: e.to_string(),
position: self.tokenizer.position(),
},
)))
}
Some(Ok(token)) => {
let mut results = Vec::new();
let mut errors = Vec::new();
self.rule_recognizer = self.rule_recognizer.clone().recognize_next(
token,
&mut self.context,
&mut results,
&mut errors,
);
if !errors.is_empty() {
return Some(Err(TurtleParseError::syntax(
crate::error::TurtleSyntaxError::Generic {
message: format!("Rule recognition error: {:?}", errors[0]),
position: self.tokenizer.position(),
},
)));
}
if let Some(result) = results.into_iter().next() {
return Some(Ok(result));
}
}
}
}
}
}
pub trait Parser<Output> {
fn parse<R: Read>(&self, reader: R) -> TurtleResult<Vec<Output>>;
fn for_reader<R: BufRead + 'static>(
&self,
reader: R,
) -> Box<dyn Iterator<Item = TurtleResult<Output>>>;
}
#[cfg(feature = "async-tokio")]
pub trait AsyncParser<Output> {
fn parse_async<R: tokio::io::AsyncRead + Unpin>(
&self,
reader: R,
) -> impl std::future::Future<Output = TurtleResult<Vec<Output>>> + Send;
fn for_async_reader<R: tokio::io::AsyncBufRead + Unpin>(
&self,
reader: R,
) -> Box<dyn futures::Stream<Item = TurtleResult<Output>> + Unpin>;
}
#[derive(Debug, Clone, Default)]
pub struct ParsingContext {
pub base_iri: Option<String>,
pub prefixes: std::collections::HashMap<String, String>,
pub blank_node_counter: usize,
}
impl ParsingContext {
pub fn new() -> Self {
Self::default()
}
pub fn with_base_iri(mut self, base_iri: String) -> Self {
self.base_iri = Some(base_iri);
self
}
pub fn add_prefix(&mut self, prefix: String, iri: String) {
self.prefixes.insert(prefix, iri);
}
pub fn resolve_prefixed_name(&self, prefix: &str, local: &str) -> Option<String> {
self.prefixes.get(prefix).map(|iri| format!("{iri}{local}"))
}
pub fn generate_blank_node_id(&mut self) -> String {
let id = format!("_:b{}", self.blank_node_counter);
self.blank_node_counter += 1;
id
}
pub fn resolve_iri(&self, iri: &str) -> String {
if let Some(ref base) = self.base_iri {
if iri.starts_with('#') || iri.starts_with('/') {
format!("{base}{iri}")
} else {
iri.to_string()
}
} else {
iri.to_string()
}
}
}