use crate::MIN_PARALLEL_CHUNK_SIZE;
use crate::chunker::get_turtle_slice_chunks;
use crate::terse::TriGRecognizer;
#[cfg(feature = "async-tokio")]
use crate::toolkit::TokioAsyncReaderIterator;
use crate::toolkit::{Parser, ReaderIterator, SliceIterator, TurtleParseError, TurtleSyntaxError};
#[cfg(feature = "async-tokio")]
use crate::trig::TokioAsyncWriterTriGSerializer;
use crate::trig::{LowLevelTriGSerializer, TriGSerializer, WriterTriGSerializer};
use oxiri::{Iri, IriParseError};
use oxrdf::{GraphNameRef, Triple, TripleRef};
use std::collections::HashMap;
use std::collections::hash_map::Iter;
use std::io::{self, Read, Write};
#[cfg(feature = "async-tokio")]
use tokio::io::{AsyncRead, AsyncWrite};
#[derive(Default, Clone)]
#[must_use]
pub struct TurtleParser {
lenient: bool,
base: Option<Iri<String>>,
prefixes: HashMap<String, Iri<String>>,
}
impl TurtleParser {
#[inline]
pub fn new() -> Self {
Self::default()
}
#[inline]
pub fn lenient(mut self) -> Self {
self.lenient = true;
self
}
#[deprecated(note = "Use `lenient()` instead", since = "0.2.0")]
#[inline]
pub fn unchecked(self) -> Self {
self.lenient()
}
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.base = Some(Iri::parse(base_iri.into())?);
Ok(self)
}
#[inline]
pub fn with_prefix(
mut self,
prefix_name: impl Into<String>,
prefix_iri: impl Into<String>,
) -> Result<Self, IriParseError> {
self.prefixes
.insert(prefix_name.into(), Iri::parse(prefix_iri.into())?);
Ok(self)
}
pub fn for_reader<R: Read>(self, reader: R) -> ReaderTurtleParser<R> {
ReaderTurtleParser {
inner: self.low_level().parser.for_reader(reader),
}
}
#[cfg(feature = "async-tokio")]
pub fn for_tokio_async_reader<R: AsyncRead + Unpin>(
self,
reader: R,
) -> TokioAsyncReaderTurtleParser<R> {
TokioAsyncReaderTurtleParser {
inner: self.low_level().parser.for_tokio_async_reader(reader),
}
}
pub fn for_slice(self, slice: &(impl AsRef<[u8]> + ?Sized)) -> SliceTurtleParser<'_> {
SliceTurtleParser {
inner: TriGRecognizer::new_parser(
slice.as_ref(),
true,
false,
self.lenient,
self.base,
self.prefixes,
)
.into_iter(),
}
}
pub fn split_slice_for_parallel_parsing(
mut self,
slice: &(impl AsRef<[u8]> + ?Sized),
target_parallelism: usize,
) -> Vec<SliceTurtleParser<'_>> {
let slice = slice.as_ref();
let n_chunks = (slice.len() / MIN_PARALLEL_CHUNK_SIZE).clamp(1, target_parallelism);
if n_chunks > 1 {
let mut from_slice_parser = self.clone().for_slice(slice);
from_slice_parser.next();
for (p, iri) in from_slice_parser.prefixes() {
self = self.with_prefix(p, iri).unwrap();
}
}
get_turtle_slice_chunks(slice, n_chunks, &self)
.into_iter()
.map(|(start, end)| self.clone().for_slice(&slice[start..end]))
.collect()
}
pub fn low_level(self) -> LowLevelTurtleParser {
LowLevelTurtleParser {
parser: TriGRecognizer::new_parser(
Vec::new(),
false,
false,
self.lenient,
self.base,
self.prefixes,
),
}
}
}
#[must_use]
pub struct ReaderTurtleParser<R: Read> {
inner: ReaderIterator<R, TriGRecognizer>,
}
impl<R: Read> ReaderTurtleParser<R> {
pub fn prefixes(&self) -> TurtlePrefixesIter<'_> {
TurtlePrefixesIter {
inner: self.inner.parser.context.prefixes(),
}
}
pub fn base_iri(&self) -> Option<&str> {
self.inner
.parser
.context
.lexer_options
.base_iri
.as_ref()
.map(Iri::as_str)
}
}
impl<R: Read> Iterator for ReaderTurtleParser<R> {
type Item = Result<Triple, TurtleParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.inner.next()?.map(Into::into))
}
}
#[cfg(feature = "async-tokio")]
#[must_use]
pub struct TokioAsyncReaderTurtleParser<R: AsyncRead + Unpin> {
inner: TokioAsyncReaderIterator<R, TriGRecognizer>,
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> TokioAsyncReaderTurtleParser<R> {
pub async fn next(&mut self) -> Option<Result<Triple, TurtleParseError>> {
Some(self.inner.next().await?.map(Into::into))
}
pub fn prefixes(&self) -> TurtlePrefixesIter<'_> {
TurtlePrefixesIter {
inner: self.inner.parser.context.prefixes(),
}
}
pub fn base_iri(&self) -> Option<&str> {
self.inner
.parser
.context
.lexer_options
.base_iri
.as_ref()
.map(Iri::as_str)
}
}
#[must_use]
pub struct SliceTurtleParser<'a> {
inner: SliceIterator<'a, TriGRecognizer>,
}
impl SliceTurtleParser<'_> {
pub fn prefixes(&self) -> TurtlePrefixesIter<'_> {
TurtlePrefixesIter {
inner: self.inner.parser.context.prefixes(),
}
}
pub fn base_iri(&self) -> Option<&str> {
self.inner
.parser
.context
.lexer_options
.base_iri
.as_ref()
.map(Iri::as_str)
}
}
impl Iterator for SliceTurtleParser<'_> {
type Item = Result<Triple, TurtleSyntaxError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.inner.next()?.map(Into::into))
}
}
pub struct LowLevelTurtleParser {
parser: Parser<Vec<u8>, TriGRecognizer>,
}
impl LowLevelTurtleParser {
pub fn extend_from_slice(&mut self, other: &[u8]) {
self.parser.extend_from_slice(other)
}
pub fn end(&mut self) {
self.parser.end()
}
pub fn is_end(&self) -> bool {
self.parser.is_end()
}
pub fn parse_next(&mut self) -> Option<Result<Triple, TurtleSyntaxError>> {
Some(self.parser.parse_next()?.map(Into::into))
}
pub fn prefixes(&self) -> TurtlePrefixesIter<'_> {
TurtlePrefixesIter {
inner: self.parser.context.prefixes(),
}
}
pub fn base_iri(&self) -> Option<&str> {
self.parser
.context
.lexer_options
.base_iri
.as_ref()
.map(Iri::as_str)
}
}
pub struct TurtlePrefixesIter<'a> {
inner: Iter<'a, String, Iri<String>>,
}
impl<'a> Iterator for TurtlePrefixesIter<'a> {
type Item = (&'a str, &'a str);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let (key, value) = self.inner.next()?;
Some((key.as_str(), value.as_str()))
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}
#[derive(Default, Clone)]
#[must_use]
pub struct TurtleSerializer {
inner: TriGSerializer,
}
impl TurtleSerializer {
#[inline]
pub fn new() -> Self {
Self::default()
}
#[inline]
pub fn with_prefix(
mut self,
prefix_name: impl Into<String>,
prefix_iri: impl Into<String>,
) -> Result<Self, IriParseError> {
self.inner = self.inner.with_prefix(prefix_name, prefix_iri)?;
Ok(self)
}
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.inner = self.inner.with_base_iri(base_iri)?;
Ok(self)
}
pub fn for_writer<W: Write>(self, writer: W) -> WriterTurtleSerializer<W> {
WriterTurtleSerializer {
inner: self.inner.for_writer(writer),
}
}
#[cfg(feature = "async-tokio")]
pub fn for_tokio_async_writer<W: AsyncWrite + Unpin>(
self,
writer: W,
) -> TokioAsyncWriterTurtleSerializer<W> {
TokioAsyncWriterTurtleSerializer {
inner: self.inner.for_tokio_async_writer(writer),
}
}
pub fn low_level(self) -> LowLevelTurtleSerializer {
LowLevelTurtleSerializer {
inner: self.inner.low_level(),
}
}
}
#[must_use]
pub struct WriterTurtleSerializer<W: Write> {
inner: WriterTriGSerializer<W>,
}
impl<W: Write> WriterTurtleSerializer<W> {
pub fn serialize_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.inner
.serialize_quad(t.into().in_graph(GraphNameRef::DefaultGraph))
}
pub fn finish(self) -> io::Result<W> {
self.inner.finish()
}
}
#[cfg(feature = "async-tokio")]
#[must_use]
pub struct TokioAsyncWriterTurtleSerializer<W: AsyncWrite + Unpin> {
inner: TokioAsyncWriterTriGSerializer<W>,
}
#[cfg(feature = "async-tokio")]
impl<W: AsyncWrite + Unpin> TokioAsyncWriterTurtleSerializer<W> {
pub async fn serialize_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.inner
.serialize_quad(t.into().in_graph(GraphNameRef::DefaultGraph))
.await
}
pub async fn finish(self) -> io::Result<W> {
self.inner.finish().await
}
}
pub struct LowLevelTurtleSerializer {
inner: LowLevelTriGSerializer,
}
impl LowLevelTurtleSerializer {
pub fn serialize_triple<'a>(
&mut self,
t: impl Into<TripleRef<'a>>,
writer: impl Write,
) -> io::Result<()> {
self.inner
.serialize_quad(t.into().in_graph(GraphNameRef::DefaultGraph), writer)
}
pub fn finish(&mut self, writer: impl Write) -> io::Result<()> {
self.inner.finish(writer)
}
}
#[cfg(test)]
#[expect(clippy::panic_in_result_fn)]
mod tests {
use super::*;
use oxrdf::{BlankNodeRef, LiteralRef, NamedNodeRef};
#[test]
fn test_write() -> io::Result<()> {
let mut serializer = TurtleSerializer::new().for_writer(Vec::new());
serializer.serialize_triple(TripleRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"),
NamedNodeRef::new_unchecked("http://example.com/p"),
NamedNodeRef::new_unchecked("http://example.com/o"),
))?;
serializer.serialize_triple(TripleRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"),
NamedNodeRef::new_unchecked("http://example.com/p"),
LiteralRef::new_simple_literal("foo"),
))?;
serializer.serialize_triple(TripleRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"),
NamedNodeRef::new_unchecked("http://example.com/p2"),
LiteralRef::new_language_tagged_literal_unchecked("foo", "en"),
))?;
serializer.serialize_triple(TripleRef::new(
BlankNodeRef::new_unchecked("b"),
NamedNodeRef::new_unchecked("http://example.com/p2"),
BlankNodeRef::new_unchecked("b2"),
))?;
assert_eq!(
String::from_utf8(serializer.finish()?).map_err(io::Error::other)?,
"<http://example.com/s> <http://example.com/p> <http://example.com/o> , \"foo\" ;\n\t<http://example.com/p2> \"foo\"@en .\n_:b <http://example.com/p2> _:b2 .\n"
);
Ok(())
}
}