use crate::compact::grammar::token_tws_no_case;
use crate::compact::grammar_structs::{Cardinality, NumericLength, NumericRange, Qualifier, SenseFlags, ShExStatement};
use crate::compact::{
IRes, Span, grammar::map_error, grammar::tag_no_case_tws, grammar::token, grammar::token_tws, grammar::traced,
grammar::tws0, shex_parser_error::ParseError as ShExParseError,
};
use crate::iri_ref_or_wildcard::IriRefOrWildcard;
use crate::string_or_wildcard::StringOrWildcard;
use crate::{
Annotation, BNode, LangOrWildcard, NodeConstraint, NodeKind, NumericFacet, Pattern, SemAct, Shape, ShapeExpr,
ShapeExprLabel, StringFacet, TripleExpr, TripleExprLabel, XsFacet, iri_exclusion::IriExclusion,
language_exclusion::LanguageExclusion, literal_exclusion::LiteralExclusion, object_value::ObjectValue,
value_set_value::ValueSetValue,
};
use lazy_regex::{Lazy, regex};
use nom::bytes::complete::tag_no_case;
use nom::{
Err, Input, Parser,
branch::alt,
bytes::complete::{tag, take_while, take_while1},
character::complete::{alpha1, alphanumeric1, char, digit0, digit1, none_of, one_of, satisfy},
combinator::{cut, map, map_res, opt, recognize},
error::ErrorKind,
error_position,
multi::{count, fold_many0, many0, many1},
sequence::{delimited, pair, preceded},
};
use nom_locate::LocatedSpan;
use prefixmap::IriRef;
use regex::Regex;
use rudof_iri::IriS;
use rudof_rdf::rdf_core::{
RDFError,
term::literal::{ConcreteLiteral, Lang, NumericLiteral},
};
use std::{collections::VecDeque, fmt::Debug};
use thiserror::Error;
pub(crate) fn shex_statement<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShExStatement<'a>> {
traced(
"shex_statement",
map_error(
move |i| alt((directive, start(), shape_expr_decl(), start_actions)).parse(i),
|| ShExParseError::ExpectedStatement,
),
)
}
fn directive(i: Span) -> IRes<ShExStatement> {
alt((base_decl(), prefix_decl(), import_decl())).parse(i)
}
fn base_decl<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShExStatement<'a>> {
traced(
"base_decl",
map_error(
move |i| {
let (i, (_, _, iri_ref)) = (tag_no_case("BASE"), tws0, cut(iri_ref)).parse(i)?;
Ok((i, ShExStatement::BaseDecl { iri: iri_ref }))
},
|| ShExParseError::ExpectedBaseDecl,
),
)
}
fn prefix_decl<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShExStatement<'a>> {
traced(
"prefix_decl",
map_error(
move |i| {
let (i, (_, _, pname_ns, _, iri_ref)) =
(tag_no_case("PREFIX"), tws0, cut(pname_ns), tws0, cut(iri_ref)).parse(i)?;
Ok((
i,
ShExStatement::PrefixDecl {
alias: pname_ns.fragment(),
iri: iri_ref,
},
))
},
|| ShExParseError::ExpectedPrefixDecl,
),
)
}
fn import_decl<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShExStatement<'a>> {
traced(
"import_decl",
map_error(
move |i| {
let (i, (_, _, iri)) = (tag_no_case("IMPORT"), tws0, cut(iri)).parse(i)?;
tracing::debug!("grammar: Import {iri:?}");
Ok((i, ShExStatement::ImportDecl { iri }))
},
|| ShExParseError::ExpectedImportDecl,
),
)
}
fn start<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShExStatement<'a>> {
map_error(
move |i| {
let (i, (_, _, _, _, se)) = (
tag_no_case("START"),
tws0,
cut(char('=')),
tws0,
cut(inline_shape_expression()),
)
.parse(i)?;
Ok((i, ShExStatement::StartDecl { shape_expr: se }))
},
|| ShExParseError::ExpectedStart,
)
}
fn start_actions(i: Span) -> IRes<ShExStatement> {
let (i, cs) = many1(code_decl()).parse(i)?;
Ok((i, ShExStatement::StartActions { actions: cs }))
}
fn shape_expr_decl<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShExStatement<'a>> {
traced(
"shape_expr_decl",
map_error(
move |i| {
let (i, (maybe_abstract, shape_label, _, shape_expr)) = (
opt(tag_no_case_tws("abstract")),
shape_expr_label,
tws0,
cut(shape_expr_or_external()),
)
.parse(i)?;
let is_abstract = maybe_abstract.is_some();
Ok((
i,
ShExStatement::ShapeDecl {
is_abstract,
shape_label,
shape_expr,
},
))
},
|| ShExParseError::ExpectedShapeExprDecl,
),
)
}
fn shape_expr_or_external<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShapeExpr> {
map_error(
move |i| alt((shape_expression(), external)).parse(i),
|| ShExParseError::ShapeExprOrExternal,
)
}
fn external(i: Span) -> IRes<ShapeExpr> {
let (i, _) = tag_no_case("EXTERNAL")(i)?;
Ok((i, ShapeExpr::external()))
}
fn shape_expression<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShapeExpr> {
traced(
"ShapeExpr",
map_error(move |i| shape_or(i), || ShExParseError::ExpectedShapeExpr),
)
}
fn inline_shape_expression<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShapeExpr> {
traced(
"inline_shape_expr",
map_error(move |i| inline_shape_or(i), || ShExParseError::ExpectedInlineShapeExpr),
)
}
fn shape_or(i: Span<'_>) -> IRes<'_, ShapeExpr> {
many1_sep(shape_and, symbol("OR"), make_shape_or, i)
}
fn make_shape_or(ses: Vec<ShapeExpr>) -> ShapeExpr {
if ses.len() == 1 {
ses[0].clone()
} else {
ShapeExpr::or(ses)
}
}
fn inline_shape_or(i: Span) -> IRes<ShapeExpr> {
many1_sep(inline_shape_and, symbol("OR"), make_shape_or, i)
}
fn shape_and(i: Span) -> IRes<ShapeExpr> {
many1_sep(shape_not, symbol("AND"), make_shape_and, i)
}
fn make_shape_and(ses: Vec<ShapeExpr>) -> ShapeExpr {
if ses.len() == 1 {
ses[0].clone()
} else {
ShapeExpr::and(ses)
}
}
fn inline_shape_and(i: Span) -> IRes<ShapeExpr> {
many1_sep(inline_shape_not, symbol("AND"), make_shape_and, i)
}
fn shape_not(i: Span) -> IRes<ShapeExpr> {
let (i, maybe) = opt(symbol("NOT")).parse(i)?;
let (i, se) = shape_atom()(i)?;
match maybe {
None => Ok((i, se)),
Some(_) => Ok((i, ShapeExpr::shape_not(se))),
}
}
fn inline_shape_not(i: Span) -> IRes<ShapeExpr> {
let (i, maybe) = opt(symbol("NOT")).parse(i)?;
let (i, se) = inline_shape_atom()(i)?;
match maybe {
None => Ok((i, se)),
Some(_) => Ok((i, ShapeExpr::shape_not(se))),
}
}
fn shape_atom<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShapeExpr> {
traced(
"shape_atom",
map_error(
move |i| {
alt((
non_lit_opt_shape_or_ref(),
lit_node_constraint_shape_expr(),
shape_opt_non_lit,
paren_shape_expr,
dot,
))
.parse(i)
},
|| ShExParseError::ShapeAtom,
),
)
}
fn non_lit_opt_shape_or_ref<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShapeExpr> {
traced(
"non_lit_opt_shape_or_ref",
map_error(
move |i| {
let (i, (non_lit, _, maybe_se)) = (non_lit_node_constraint, tws0, cut(opt(shape_or_ref()))).parse(i)?;
let nc = ShapeExpr::node_constraint(non_lit);
let se_result = match maybe_se {
None => nc,
Some(se) => match se {
ShapeExpr::ShapeAnd { shape_exprs } => {
let mut new_ses = vec![nc];
for sew in shape_exprs {
new_ses.push(sew.se)
}
ShapeExpr::and(new_ses)
},
other => make_shape_and(vec![nc, other]),
},
};
Ok((i, se_result))
},
|| ShExParseError::NonLitNodeConstraintOptShapeOrRef,
),
)
}
fn shape_opt_non_lit(i: Span) -> IRes<ShapeExpr> {
let (i, se) = shape_or_ref()(i)?;
let (i, maybe_non_lit) = opt(non_lit_node_constraint).parse(i)?;
match maybe_non_lit {
None => Ok((i, se)),
Some(nl) => Ok((i, ShapeExpr::and(vec![se, ShapeExpr::node_constraint(nl)]))),
}
}
fn inline_shape_atom<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShapeExpr> {
traced(
"inline_shape_atom",
map_error(
move |i| {
alt((
non_lit_inline_opt_shape_or_ref(),
lit_node_constraint_shape_expr(),
inline_shape_or_ref_opt_non_lit,
paren_shape_expr,
dot,
))
.parse(i)
},
|| ShExParseError::ExpectedInlineShapeAtom,
),
)
}
fn non_lit_inline_opt_shape_or_ref<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShapeExpr> {
traced(
"non_lit_inline_nodeConstraint InlineShapeOr?",
map_error(
move |i| {
let (i, (non_lit, _, maybe_se)) = (non_lit_node_constraint, tws0, opt(inline_shape_or_ref)).parse(i)?;
let nc = ShapeExpr::node_constraint(non_lit);
let se_result = match maybe_se {
None => nc,
Some(se) => make_shape_and(vec![nc, se]),
};
Ok((i, se_result))
},
|| ShExParseError::NonLitInlineNodeConstraintOptShapeOrRef,
),
)
}
fn inline_shape_or_ref_opt_non_lit(i: Span) -> IRes<ShapeExpr> {
let (i, se) = inline_shape_or_ref(i)?;
let (i, maybe_non_lit) = opt(non_lit_node_constraint).parse(i)?;
match maybe_non_lit {
None => Ok((i, se)),
Some(nl) => Ok((i, ShapeExpr::and(vec![se, ShapeExpr::node_constraint(nl)]))),
}
}
fn lit_node_constraint_shape_expr<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShapeExpr> {
traced(
"lit_node_constraint",
map_error(
move |i| {
let (i, nc) = lit_node_constraint()(i)?;
Ok((i, ShapeExpr::NodeConstraint(nc)))
},
|| ShExParseError::LitNodeConstraint,
),
)
}
fn paren_shape_expr(i: Span) -> IRes<ShapeExpr> {
let (i, (_, _, se, _, _)) = (char('('), tws0, shape_expression(), tws0, char(')')).parse(i)?;
Ok((i, se))
}
fn dot(i: Span) -> IRes<ShapeExpr> {
let (i, (_, _)) = (tws0, char('.')).parse(i)?;
Ok((i, ShapeExpr::any()))
}
fn shape_or_ref<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShapeExpr> {
traced(
"shape_or_ref",
map_error(
move |i| alt((shape_definition(), map(shape_ref, ShapeExpr::Ref))).parse(i),
|| ShExParseError::ExpectedShapeOrRef,
),
)
}
fn inline_shape_or_ref(i: Span) -> IRes<ShapeExpr> {
alt((inline_shape_definition, map(shape_ref, ShapeExpr::Ref))).parse(i)
}
fn shape_ref(i: Span) -> IRes<ShapeExprLabel> {
alt((at_pname_ln, at_pname_ns, at_shape_expr_label)).parse(i)
}
fn at_shape_expr_label(i: Span) -> IRes<ShapeExprLabel> {
let (i, (_, label)) = (char('@'), shape_expr_label).parse(i)?;
Ok((i, label))
}
fn lit_node_constraint<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, NodeConstraint> {
traced(
"lit_node_constraint",
map_error(
move |i| alt((literal_facets(), datatype_facets(), value_set_facets(), numeric_facets)).parse(i),
|| ShExParseError::LitNodeConstraint,
),
)
}
fn literal_facets<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, NodeConstraint> {
traced("literal_facets", move |i| {
let (i, (_, _, facets)) = (tag_no_case("LITERAL"), tws0, facets()).parse(i)?;
Ok((
i,
NodeConstraint::new()
.with_node_kind(NodeKind::Literal)
.with_xsfacets(facets),
))
})
}
fn datatype_facets<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, NodeConstraint> {
traced(
"datatype_facets",
map_error(
move |i| {
let (i, (dt, _, facets)) = (datatype, tws0, facets()).parse(i)?;
Ok((i, dt.with_xsfacets(facets)))
},
|| ShExParseError::DatatypeFacets,
),
)
}
fn value_set_facets<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, NodeConstraint> {
traced(
"value_set_facets",
map_error(
move |i| {
let (i, (vs, _, facets)) = (value_set(), tws0, facets()).parse(i)?;
Ok((i, vs.with_xsfacets(facets)))
},
|| ShExParseError::ValueSetFacets,
),
)
}
fn numeric_facets(i: Span) -> IRes<NodeConstraint> {
map(many1(numeric_facet()), |ns| NodeConstraint::new().with_xsfacets(ns)).parse(i)
}
fn facets<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Vec<XsFacet>> {
traced("facets", move |i| many0(xs_facet()).parse(i))
}
fn non_lit_node_constraint(i: Span) -> IRes<NodeConstraint> {
alt((non_literal_kind_string_facets, string_facets)).parse(i)
}
fn non_literal_kind_string_facets(i: Span) -> IRes<NodeConstraint> {
let (i, (kind, facets)) = (non_literal_kind, many0(string_facet)).parse(i)?;
let mut nc = NodeConstraint::new().with_node_kind(kind);
if !facets.is_empty() {
nc = nc.with_xsfacets(facets);
}
Ok((i, nc))
}
fn string_facets(i: Span) -> IRes<NodeConstraint> {
let (i, facets) = many1(string_facet).parse(i)?;
Ok((i, NodeConstraint::new().with_xsfacets(facets)))
}
fn non_literal_kind(i: Span) -> IRes<NodeKind> {
alt((
map(token_tws_no_case("IRI"), |_| NodeKind::Iri),
map(token_tws_no_case("BNODE"), |_| NodeKind::BNode),
map(token_tws_no_case("NONLITERAL"), |_| NodeKind::NonLiteral),
))
.parse(i)
}
fn xs_facet<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, XsFacet> {
traced("xs_facet", move |i| alt((string_facet, numeric_facet())).parse(i))
}
fn string_facet(i: Span) -> IRes<XsFacet> {
alt((
string_length,
map(regexp, |p| XsFacet::StringFacet(StringFacet::Pattern(p))),
))
.parse(i)
}
fn string_length(i: Span) -> IRes<XsFacet> {
alt((min_length, max_length, length)).parse(i)
}
fn min_length(i: Span) -> IRes<XsFacet> {
let (i, (_, _, n)) = (tag_no_case("MINLENGTH"), tws0, pos_integer).parse(i)?;
Ok((i, XsFacet::min_length(n)))
}
fn max_length(i: Span) -> IRes<XsFacet> {
let (i, (_, _, n)) = (tag_no_case("MAXLENGTH"), tws0, pos_integer).parse(i)?;
Ok((i, XsFacet::max_length(n)))
}
fn length(i: Span) -> IRes<XsFacet> {
let (i, (_, _, n)) = (tag_no_case("LENGTH"), tws0, pos_integer).parse(i)?;
Ok((i, XsFacet::length(n)))
}
fn pos_integer(i: Span) -> IRes<usize> {
let (i, n) = integer()(i)?;
let u: usize;
if n < 0 {
Err(Err::Error(error_position!(i, ErrorKind::Digit)))
} else {
u = n as usize;
Ok((i, u))
}
}
fn numeric_facet<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, XsFacet> {
traced("numeric_facet", move |i| {
alt((numeric_range_lit(), numeric_length_int())).parse(i)
})
}
fn numeric_range_lit<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, XsFacet> {
traced("numeric_range", move |i| {
let (i, (n_range, v)) = (numeric_range, cut(raw_numeric_literal())).parse(i)?;
let v = match n_range {
NumericRange::MinInclusive => XsFacet::NumericFacet(NumericFacet::MinInclusive(v)),
NumericRange::MinExclusive => XsFacet::NumericFacet(NumericFacet::MinExclusive(v)),
NumericRange::MaxInclusive => XsFacet::NumericFacet(NumericFacet::MaxInclusive(v)),
NumericRange::MaxExclusive => XsFacet::NumericFacet(NumericFacet::MaxExclusive(v)),
};
Ok((i, v))
})
}
fn numeric_length_int<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, XsFacet> {
traced("numeric_length_int", move |i| {
let (i, (numeric_length, n)) = (numeric_length, integer()).parse(i)?;
let nm = match numeric_length {
NumericLength::FractionDigits => XsFacet::NumericFacet(NumericFacet::FractionDigits(n as usize)),
NumericLength::TotalDigits => XsFacet::NumericFacet(NumericFacet::TotalDigits(n as usize)),
};
Ok((i, nm))
})
}
fn numeric_length(i: Span) -> IRes<NumericLength> {
alt((
map(token_tws("TOTALDIGITS"), |_| NumericLength::TotalDigits),
map(token_tws("FRACTIONDIGITS"), |_| NumericLength::FractionDigits),
))
.parse(i)
}
fn numeric_range(i: Span) -> IRes<NumericRange> {
alt((
map(token_tws("MININCLUSIVE"), |_| NumericRange::MinInclusive),
map(token_tws("MAXINCLUSIVE"), |_| NumericRange::MaxInclusive),
map(token_tws("MINEXCLUSIVE"), |_| NumericRange::MinExclusive),
map(token_tws("MAXEXCLUSIVE"), |_| NumericRange::MaxExclusive),
))
.parse(i)
}
fn shape_definition<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ShapeExpr> {
traced(
"shape_definition",
map_error(
move |i| {
let (i, (qualifiers, _, maybe_triple_expr, _, annotations, _, sem_actions)) = (
qualifiers(),
token_tws("{"),
maybe_triple_expr(),
token_tws("}"),
annotations,
tws0,
semantic_actions,
)
.parse(i)?;
let closed = if qualifiers.contains(&Qualifier::Closed) {
Some(true)
} else {
None
};
let mut extra = Vec::new();
let mut extends = Vec::new();
for q in qualifiers {
match q {
Qualifier::Extends(label) => extends.push(label),
Qualifier::Closed => {},
Qualifier::Extra(ps) => {
for p in ps {
extra.push(p)
}
},
}
}
let maybe_extra = if extra.is_empty() { None } else { Some(extra) };
let maybe_extends = if extends.is_empty() { None } else { Some(extends) };
let annotations = if annotations.is_empty() {
None
} else {
Some(annotations)
};
Ok((
i,
ShapeExpr::shape(
Shape::new(closed, maybe_extra, maybe_triple_expr)
.with_annotations(annotations)
.with_sem_acts(sem_actions)
.with_extends(maybe_extends),
),
))
},
|| ShExParseError::ExpectedShapeDefinition,
),
)
}
fn inline_shape_definition(i: Span) -> IRes<ShapeExpr> {
let (i, (qualifiers, _, maybe_triple_expr, _)) =
(qualifiers(), token_tws("{"), maybe_triple_expr(), token_tws("}")).parse(i)?;
let closed = if qualifiers.contains(&Qualifier::Closed) {
Some(true)
} else {
None
};
let mut extra = Vec::new();
for q in qualifiers {
match q {
Qualifier::Extends(_) => {
todo!()
},
Qualifier::Closed => {},
Qualifier::Extra(ps) => {
for p in ps {
extra.push(p)
}
},
}
}
let maybe_extra = if extra.is_empty() { None } else { Some(extra) };
Ok((i, ShapeExpr::shape(Shape::new(closed, maybe_extra, maybe_triple_expr))))
}
fn maybe_triple_expr<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Option<TripleExpr>> {
traced("maybe_triple_expr", move |i| {
alt((map(triple_expression(), Some), map(tws0, |_| None))).parse(i)
})
}
fn annotations(i: Span) -> IRes<Vec<Annotation>> {
many0(annotation()).parse(i)
}
fn qualifiers<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Vec<Qualifier>> {
traced(
"qualifiers",
map_error(
move |i| many0(qualifier()).parse(i),
|| ShExParseError::ExpectedQualifiers,
),
)
}
fn qualifier<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Qualifier> {
traced(
"qualifier",
map_error(
move |i| alt((extension(), closed(), extra_property_set())).parse(i),
|| ShExParseError::ExpectedQualifier,
),
)
}
fn extension<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Qualifier> {
traced(
"extension",
map_error(
move |i| {
let (i, (_, sr)) =
alt(((tag_no_case_tws("extends"), shape_ref), (token_tws("&"), shape_ref))).parse(i)?;
Ok((i, Qualifier::Extends(sr)))
},
|| ShExParseError::Extension,
),
)
}
fn closed<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Qualifier> {
traced(
"Closed",
map_error(
move |i| {
let (i, _) = token_tws("CLOSED")(i)?;
Ok((i, Qualifier::Closed))
},
|| ShExParseError::ExpectedClosed,
),
)
}
fn extra_property_set<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Qualifier> {
traced(
"extra_property_set",
map_error(
move |i| {
let (i, (_, ps)) = (token_tws("EXTRA"), cut(many1((predicate, tws0)))).parse(i)?;
let ps = ps.into_iter().map(|(p, _)| p).collect();
Ok((i, Qualifier::Extra(ps)))
},
|| ShExParseError::ExpectedEXTRAPropertySet,
),
)
}
fn triple_expression<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, TripleExpr> {
traced(
"triple_expression",
map_error(move |i| one_of_triple_expr()(i), || ShExParseError::TripleExpression),
)
}
fn one_of_triple_expr<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, TripleExpr> {
traced(
"one_of_triple_expr",
map_error(
move |i| alt((multi_element_one_of(), group_triple_expr())).parse(i),
|| ShExParseError::OneOfTripleExpr,
),
)
}
fn multi_element_one_of<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, TripleExpr> {
traced("multi_element_one_of", move |i| {
let (i, (te1, _, tes)) = (group_triple_expr(), tws0, rest_group_triple_expr).parse(i)?;
let mut rs = vec![te1];
for te in tes {
rs.push(te);
}
let te = TripleExpr::one_of(rs);
Ok((i, te))
})
}
fn rest_group_triple_expr(i: Span) -> IRes<Vec<TripleExpr>> {
let (i, vs) = many1((token_tws("|"), group_triple_expr())).parse(i)?;
let mut tes = Vec::new();
for v in vs {
let (_, te) = v;
tes.push(te);
}
Ok((i, tes))
}
fn group_triple_expr<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, TripleExpr> {
traced("group_triple_expr", move |i| {
alt((multi_element_group, single_element_group)).parse(i)
})
}
fn single_element_group(i: Span) -> IRes<TripleExpr> {
let (i, (te, _, _)) = (unary_triple_expr(), tws0, opt(char(';'))).parse(i)?;
Ok((i, te))
}
fn multi_element_group(i: Span) -> IRes<TripleExpr> {
let (i, (te1, _, tes, _, _)) =
(unary_triple_expr(), tws0, rest_unary_triple_expr, tws0, opt(char(';'))).parse(i)?;
let mut rs = vec![te1];
for t in tes {
rs.push(t);
}
let te = TripleExpr::each_of(rs);
Ok((i, te))
}
fn rest_unary_triple_expr(i: Span) -> IRes<Vec<TripleExpr>> {
let (i, vs) = many1((token_tws(";"), unary_triple_expr())).parse(i)?;
let mut tes = Vec::new();
for v in vs {
let (_, te) = v;
tes.push(te)
}
Ok((i, tes))
}
fn unary_triple_expr<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, TripleExpr> {
traced(
"unary_triple_expr",
map_error(
move |i| alt((include_(), unary_triple_expr_opt1)).parse(i),
|| ShExParseError::UnaryTripleExpr,
),
)
}
fn unary_triple_expr_opt1(i: Span) -> IRes<TripleExpr> {
let (i, (id, _, te)) = (
triple_expr_label_opt,
tws0,
alt((bracketed_triple_expr(), triple_constraint())),
)
.parse(i)?;
Ok((i, te.with_id(id)))
}
fn triple_expr_label_opt(i: Span) -> IRes<Option<TripleExprLabel>> {
let (i, maybe_ts) = opt((char('$'), tws0, triple_expr_label)).parse(i)?;
let maybe_label = maybe_ts.map(|(_, _, r)| r);
Ok((i, maybe_label))
}
fn bracketed_triple_expr<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, TripleExpr> {
traced(
"bracketed_triple_expr",
map_error(
move |i| {
let (i, (_, te, _, maybe_card, _, annotations, _, sem_acts)) = (
token_tws("("),
cut(triple_expression()),
cut(token_tws(")")),
cut(opt(cardinality())),
tws0,
annotations,
tws0,
semantic_actions,
)
.parse(i)?;
let mut te = te;
if let Some(card) = maybe_card {
te = te.with_min(card.min());
te = te.with_max(card.max());
};
if !annotations.is_empty() {
te = te.with_annotations(Some(annotations));
}
te = te.with_sem_acts(sem_acts);
Ok((i, te))
},
|| ShExParseError::BracketedTripleExpr,
),
)
}
fn triple_constraint<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, TripleExpr> {
traced(
"triple_constraint",
map_error(
move |i| {
let (i, (maybe_sense_flags, _, predicate, _, se, _, maybe_card, _, annotations, _, sem_acts)) = (
opt(sense_flags),
tws0,
predicate,
tws0,
inline_shape_expression(),
tws0,
opt(cardinality()),
tws0,
annotations,
tws0,
semantic_actions,
)
.parse(i)?;
let (min, max) = match maybe_card {
None => (None, None),
Some(card) => (card.min(), card.max()),
};
let value_expr = if se == ShapeExpr::any() { None } else { Some(se) };
let (negated, inverse) = match maybe_sense_flags {
Some(sf) => sf.extract(),
None => (None, None),
};
let mut te = TripleExpr::triple_constraint(negated, inverse, predicate, value_expr, min, max);
te = te.with_sem_acts(sem_acts);
if !annotations.is_empty() {
te = te.with_annotations(Some(annotations))
}
Ok((i, te))
},
|| ShExParseError::ExpectedTripleConstraint,
),
)
}
fn sense_flags(i: Span) -> IRes<SenseFlags> {
alt((sense_flags_negated, sense_flags_inverse)).parse(i)
}
fn negated(i: Span) -> IRes<Span> {
token_tws("!")(i)
}
fn inverse(i: Span) -> IRes<Span> {
token_tws("^")(i)
}
fn sense_flags_negated(i: Span) -> IRes<SenseFlags> {
let (i, (_, maybe_inverse)) = (negated, opt(inverse)).parse(i)?;
let inverse = maybe_inverse.map(|_| true);
Ok((
i,
SenseFlags {
negated: Some(true),
inverse,
},
))
}
fn sense_flags_inverse(i: Span) -> IRes<SenseFlags> {
let (i, (_, maybe_negated)) = (inverse, opt(negated)).parse(i)?;
let negated = maybe_negated.map(|_| true);
Ok((
i,
SenseFlags {
inverse: Some(true),
negated,
},
))
}
fn cardinality<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Cardinality> {
traced(
"cardinality",
map_error(
move |i| alt((plus, star, optional, repeat_range())).parse(i),
|| ShExParseError::ExpectedCardinality,
),
)
}
fn plus(i: Span) -> IRes<Cardinality> {
let (i, _) = char('+')(i)?;
Ok((i, Cardinality::plus()))
}
fn star(i: Span) -> IRes<Cardinality> {
let (i, _) = char('*')(i)?;
Ok((i, Cardinality::star()))
}
fn optional(i: Span) -> IRes<Cardinality> {
let (i, _) = char('?')(i)?;
Ok((i, Cardinality::optional()))
}
fn value_set<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, NodeConstraint> {
traced(
"value set",
map_error(
move |i| {
let (i, (_, vs, _)) = (token_tws("["), many0(value_set_value()), token_tws("]")).parse(i)?;
Ok((i, NodeConstraint::new().with_values(vs)))
},
|| ShExParseError::ValueSet,
),
)
}
fn value_set_value<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ValueSetValue> {
traced(
"value_set_value",
map_error(
move |i| alt((exclusion_plus(), iri_range, literal_range(), language_range())).parse(i),
|| ShExParseError::ValueSetValue,
),
)
}
fn exclusion_plus<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ValueSetValue> {
traced(
"wildcard exclusion",
map_error(
move |i| {
let (i, (_, e)) = (
token_tws("."),
alt((
map(many1(literal_exclusion), |es| ValueSetValue::LiteralStemRange {
stem: StringOrWildcard::Wildcard,
exclusions: Some(es),
}),
map(many1(language_exclusion), |es| ValueSetValue::LanguageStemRange {
stem: LangOrWildcard::Wildcard,
exclusions: Some(es),
}),
map(many1(iri_exclusion), |es| ValueSetValue::IriStemRange {
stem: IriRefOrWildcard::Wildcard,
exclusions: Some(es),
}),
)),
)
.parse(i)?;
Ok((i, e))
},
|| ShExParseError::ExclusionPlus,
),
)
}
fn iri_range(i: Span) -> IRes<ValueSetValue> {
let (i, (iri, _, maybe_stem)) = (iri, tws0, opt(tilde_iri_exclusion)).parse(i)?;
let value = match maybe_stem {
None => ValueSetValue::iri(iri),
Some(excs) => {
if excs.is_empty() {
ValueSetValue::IriStem { stem: iri }
} else {
ValueSetValue::IriStemRange {
stem: IriRefOrWildcard::IriRef(iri),
exclusions: Some(excs),
}
}
},
};
Ok((i, value))
}
fn tilde_iri_exclusion(i: Span) -> IRes<Vec<IriExclusion>> {
let (i, (_, _, es)) = (char('~'), tws0, many0(iri_exclusion)).parse(i)?;
Ok((i, es))
}
fn tilde_literal_exclusion(i: Span) -> IRes<Vec<LiteralExclusion>> {
let (i, (_, es)) = (token_tws("~"), many0(literal_exclusion)).parse(i)?;
Ok((i, es))
}
fn iri_exclusion(i: Span) -> IRes<IriExclusion> {
let (i, (_, iri, _, maybe_tilde)) = (token_tws("-"), iri, tws0, opt(token_tws("~"))).parse(i)?;
let iri_exc = match maybe_tilde {
None => IriExclusion::Iri(iri),
Some(_) => IriExclusion::IriStem(iri),
};
Ok((i, iri_exc))
}
fn literal_range<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ValueSetValue> {
traced(
"literal_range",
map_error(
move |i| {
let (i, (literal, _, maybe_exc)) = (literal(), tws0, opt(tilde_literal_exclusion)).parse(i)?;
let vs = match maybe_exc {
None => ValueSetValue::ObjectValue(ObjectValue::Literal(literal)),
Some(excs) => {
if excs.is_empty() {
ValueSetValue::literal_stem(literal.lexical_form())
} else {
ValueSetValue::string_stem_range(literal.lexical_form(), excs)
}
},
};
Ok((i, vs))
},
|| ShExParseError::ExpectedLiteralRange,
),
)
}
fn tilde<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Span<'a>> {
move |i| token_tws("~")(i)
}
fn dash<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Span<'a>> {
move |i| token_tws("-")(i)
}
fn literal_exclusion(i: Span) -> IRes<LiteralExclusion> {
let (i, (_, literal, maybe_tilde)) = (dash(), literal(), opt(tilde())).parse(i)?;
let le = match maybe_tilde {
Some(_) => LiteralExclusion::LiteralStem(literal.lexical_form()),
None => LiteralExclusion::Literal(literal.lexical_form()),
};
Ok((i, le))
}
fn language_range<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ValueSetValue> {
traced(
"language_range",
map_error(
move |i| alt((language_range1(), language_range2())).parse(i),
|| ShExParseError::LanguageRange,
),
)
}
fn language_range1<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ValueSetValue> {
traced(
"language_range1",
map_error(
move |i| {
let (i, (lang_tag, _, maybe_stem_exclusions)) =
(lang_tag, tws0, opt((token_tws("~"), language_exclusions))).parse(i)?;
let value: ValueSetValue = match maybe_stem_exclusions {
None => ValueSetValue::language(lang_tag),
Some((_, exclusions)) => {
if exclusions.is_empty() {
ValueSetValue::language_stem(lang_tag)
} else {
ValueSetValue::LanguageStemRange {
stem: LangOrWildcard::Lang(lang_tag),
exclusions: Some(exclusions),
}
}
},
};
Ok((i, value))
},
|| ShExParseError::LanguageRange,
),
)
}
fn language_range2<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ValueSetValue> {
traced(
"language_range2",
map_error(
move |i| {
let (i, (_, _, exclusions)) = (token_tws("@"), token_tws("~"), language_exclusions).parse(i)?;
let v = if exclusions.is_empty() {
ValueSetValue::LanguageStem {
stem: LangOrWildcard::wildcard(),
}
} else {
ValueSetValue::LanguageStemRange {
stem: LangOrWildcard::wildcard(),
exclusions: Some(exclusions),
}
};
Ok((i, v))
},
|| ShExParseError::LanguageRange,
),
)
}
fn language_exclusions(i: Span) -> IRes<Vec<LanguageExclusion>> {
many0(language_exclusion).parse(i)
}
fn language_exclusion(i: Span) -> IRes<LanguageExclusion> {
let (i, (_, lang, _, maybe_tilde)) = (token_tws("-"), lang_tag, tws0, opt(token_tws("~"))).parse(i)?;
let lang_exc = match maybe_tilde {
None => LanguageExclusion::Language(lang),
Some(_) => LanguageExclusion::LanguageStem(lang),
};
Ok((i, lang_exc))
}
fn include_<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, TripleExpr> {
traced(
"include",
map_error(
move |i| {
let (i, (_, tel)) = (token_tws("&"), cut(triple_expr_label)).parse(i)?;
Ok((i, TripleExpr::Ref(tel)))
},
|| ShExParseError::Include,
),
)
}
fn annotation<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Annotation> {
traced(
"annotation",
map_error(
move |i| {
let (i, (_, p, _, o)) = (token_tws("//"), cut(predicate), tws0, cut(iri_or_literal())).parse(i)?;
Ok((i, Annotation::new(p, o)))
},
|| ShExParseError::ExpectedAnnotation,
),
)
}
fn iri_or_literal<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ObjectValue> {
traced(
"iri_or_literal",
map_error(
move |i| alt((map(iri, ObjectValue::iri_ref), map(literal(), ObjectValue::Literal))).parse(i),
|| ShExParseError::ExpectedIriOrLiteral,
),
)
}
fn semantic_actions(i: Span) -> IRes<Option<Vec<SemAct>>> {
let (i, sas) = many0(code_decl()).parse(i)?;
if sas.is_empty() {
Ok((i, None))
} else {
Ok((i, Some(sas)))
}
}
fn code_decl<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, SemAct> {
traced(
"code_decl",
map_error(
move |i| {
let (i, (_, _, iri, _, code, _)) =
(char('%'), tws0, cut(iri), tws0, cut(code_or_percent), tws0).parse(i)?;
Ok((i, SemAct::new(iri, code)))
},
|| ShExParseError::CodeDeclaration,
),
)
}
fn code_or_percent(i: Span) -> IRes<Option<String>> {
let (i, maybe_code) = alt((code(), percent_code)).parse(i)?;
Ok((i, maybe_code))
}
fn percent_code(i: Span) -> IRes<Option<String>> {
let (i, _) = char('%')(i)?;
Ok((i, None))
}
pub fn literal<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ConcreteLiteral> {
traced(
"literal",
map_error(
move |i| {
alt((
rdf_literal(),
map(numeric_literal, ConcreteLiteral::NumericLiteral),
boolean_literal,
))
.parse(i)
},
|| ShExParseError::Literal,
),
)
}
fn numeric_literal(i: Span) -> IRes<NumericLiteral> {
alt((map(double, NumericLiteral::double), decimal, integer_literal())).parse(i)
}
fn raw_numeric_literal<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, NumericLiteral> {
map_error(
move |i| {
alt((
|i| {
let (i, val) = double(i)?;
match NumericLiteral::decimal_from_f64(val) {
Ok(n) => Ok((i, n)),
Err(_) => Err(Err::Error(ShExParseError::NumericLiteral.at(i))),
}
},
decimal,
integer_literal(),
))
.parse(i)
},
|| ShExParseError::NumericLiteral,
)
}
fn integer_literal<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, NumericLiteral> {
map_error(
move |i| map(integer(), NumericLiteral::integer_from_i128).parse(i),
|| ShExParseError::IntegerLiteral,
)
}
fn boolean_literal(i: Span) -> IRes<ConcreteLiteral> {
map(boolean_value, ConcreteLiteral::boolean).parse(i)
}
fn boolean_value(i: Span) -> IRes<bool> {
alt((map(token_tws("true"), |_| true), map(token_tws("false"), |_| false))).parse(i)
}
fn rdf_literal<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ConcreteLiteral> {
traced(
"rdf_literal",
map_error(
move |i| {
let (i, str) = string()(i)?;
let (i, maybe_value) = opt(alt((
map(lang_tag, |lang| ConcreteLiteral::lang_str(&str, lang)),
map(preceded(token("^^"), datatype_iri), |datatype| {
ConcreteLiteral::lit_datatype(&str, &datatype)
}),
)))
.parse(i)?;
let value = match maybe_value {
Some(v) => v,
None => ConcreteLiteral::str(&str),
};
Ok((i, value))
},
|| ShExParseError::RDFLiteral,
),
)
}
fn datatype_iri(i: Span) -> IRes<IriRef> {
iri(i)
}
pub fn string<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, String> {
traced(
"string",
map_error(
move |i| {
alt((
string_literal_long1,
string_literal_long2,
string_literal1(),
string_literal2,
))
.parse(i)
},
|| ShExParseError::ExpectedStringLiteral,
),
)
}
fn string_literal2(i: Span) -> IRes<String> {
let (i, chars) = delimited(
token(r#"""#),
cut(many0(alt((none_of(REQUIRES_ESCAPE), echar, uchar)))),
token(r#"""#),
)
.parse(i)?;
let str = chars.iter().collect();
Ok((i, str))
}
fn string_literal1<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, String> {
traced(
"string_literal1",
map_error(
move |i| {
let (i, chars) =
delimited(token("'"), many0(alt((single_quote_char(), echar, uchar))), token("'")).parse(i)?;
let str = chars.iter().collect();
Ok((i, str))
},
|| ShExParseError::StringLiteralQuote,
),
)
}
fn single_quote_char<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, char> {
traced("single_quote_char", move |i| none_of(REQUIRES_ESCAPE_SINGLE_QUOTE)(i))
}
fn string_literal_long1(i: Span) -> IRes<String> {
let (i, chars) = delimited(
token("'''"),
cut(many0(alt((none_of(r"'\"), echar, uchar)))),
token("'''"),
)
.parse(i)?;
let str = chars.iter().collect();
Ok((i, str))
}
fn string_literal_long2(i: Span) -> IRes<String> {
let (i, chars) = delimited(
token(r#"""""#),
cut(many0(alt((none_of(r#""\"#), echar, uchar)))),
token(r#"""""#),
)
.parse(i)?;
let str = chars.iter().collect();
Ok((i, str))
}
pub fn hex(input: Span) -> IRes<Span> {
recognize(one_of(HEXDIGIT)).parse(input)
}
pub static HEX: &Lazy<Regex> = regex!("[0123456789ABCDEFabcdef]");
pub fn hex_refactor(input: Span) -> IRes<Span> {
re_find(HEX)(input)
}
use rudof_rdf::rdf_core::vocabs::RdfVocab;
pub fn re_find(re: &Lazy<Regex>) -> impl Fn(Span) -> IRes<Span> {
move |i| {
let str = i.fragment();
if let Some(m) = re.find(str) {
let (remaining, full_prefix) = i.take_split(m.end());
Ok((remaining, full_prefix.take_from(m.start())))
} else {
let e = ShExParseError::RegexFailed {
re: re.to_string(),
str: str.to_string(),
};
Err(Err::Error(e.at(i)))
}
}
}
const HEXDIGIT: &str = "0123456789ABCDEFabcdef";
const REQUIRES_ESCAPE: &str = "\u{22}\u{5C}\u{0A}\u{0D}";
const REQUIRES_ESCAPE_SINGLE_QUOTE: &str = "\u{27}\u{5C}\u{0A}\u{0D}";
fn uchar(i: Span) -> IRes<char> {
let (i, str) = recognize(alt((
preceded(token(r"\u"), count(hex, 4)),
preceded(token(r"\U"), count(hex, 8)),
)))
.parse(i)?;
let c = unescape_uchar(str.fragment()).unwrap();
Ok((i, c))
}
fn echar(i: Span) -> IRes<char> {
let (i, c) = preceded(token(r"\"), one_of(r#"tbnrf"'\"#)).parse(i)?;
let c = match c {
't' => '\t',
'b' => '\u{0008}',
'n' => '\n',
'r' => '\u{000D}',
'f' => '\u{000C}',
'\"' => '\u{0022}',
'\'' => '\u{0027}',
'\\' => '\u{005C}',
_ => panic!("echar: unrecognized character: {c}"),
};
Ok((i, c))
}
fn lang_tag(i: Span) -> IRes<Lang> {
let (i, lang_str) = preceded(
token("@"),
recognize((alpha1, many0(preceded(token("-"), alphanumeric1)))),
)
.parse(i)?;
let lang = Lang::new(*lang_str.fragment()).map_err(|_| {
let e = ShExParseError::InvalidLangTag {
lang: lang_str.fragment().to_string(),
};
Err::Error(e.at(lang_str))
})?;
Ok((i, lang))
}
fn predicate(i: Span) -> IRes<IriRef> {
alt((iri, rdf_type)).parse(i)
}
fn datatype(i: Span) -> IRes<NodeConstraint> {
let (i, iri_ref) = iri(i)?;
Ok((i, NodeConstraint::new().with_datatype(iri_ref)))
}
pub(crate) fn shape_expr_label(i: Span) -> IRes<ShapeExprLabel> {
let (i, ref_) = alt((iri_as_ref, blank_node_ref)).parse(i)?;
Ok((i, ref_))
}
fn iri_as_ref(i: Span) -> IRes<ShapeExprLabel> {
let (i, iri_ref) = iri(i)?;
Ok((i, ShapeExprLabel::iri_ref(iri_ref)))
}
fn blank_node_ref(i: Span) -> IRes<ShapeExprLabel> {
let (i, bn) = blank_node(i)?;
Ok((i, ShapeExprLabel::bnode(bn)))
}
fn triple_expr_label(i: Span) -> IRes<TripleExprLabel> {
alt((
map(iri, |value| TripleExprLabel::IriRef { value }),
map(blank_node, |value| TripleExprLabel::BNode { value }),
))
.parse(i)
}
fn code<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Option<String>> {
traced(
"code",
map_error(
move |i| {
let (i, (_, str, _, _, _)) =
(char('{'), cut(code_str), cut(char('%')), tws0, cut(char('}'))).parse(i)?;
Ok((i, Some(str)))
},
|| ShExParseError::Code,
),
)
}
fn code_str(i: Span) -> IRes<String> {
let (i, chars) = many0(alt((none_of(REQUIRES_ESCAPE_CODE), escaped_code, uchar))).parse(i)?;
let str = chars.iter().collect();
Ok((i, str))
}
const REQUIRES_ESCAPE_CODE: &str = "%\u{5C}";
fn escaped_code(i: Span) -> IRes<char> {
let (i, c) = preceded(token(r"\"), one_of(r#"%\"#)).parse(i)?;
Ok((i, c))
}
fn repeat_range<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Cardinality> {
traced(
"repeat_range",
map_error(
move |i| {
let (i, (_, min, maybe_rest_range, _)) =
(token("{"), integer(), opt(rest_range()), cut(token("}"))).parse(i)?;
let cardinality = match maybe_rest_range {
None => Cardinality::exact(min as i32),
Some(maybe_max) => match maybe_max {
None => Cardinality::min_max(min as i32, -1),
Some(max) => Cardinality::min_max(min as i32, max),
},
};
Ok((i, cardinality))
},
|| ShExParseError::ExpectedRepeatRange,
),
)
}
fn rest_range<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Option<i32>> {
traced(
"rest_range",
map_error(
move |i| {
let (i, (_, maybe_max)) = (token_tws(","), opt(integer_or_star)).parse(i)?;
Ok((i, maybe_max))
},
|| ShExParseError::ExpectedRestRepeatRange,
),
)
}
fn integer_or_star(i: Span) -> IRes<i32> {
alt((map(integer(), |n| n as i32), map(token_tws("*"), |_| -1))).parse(i)
}
fn rdf_type(i: Span) -> IRes<IriRef> {
let (i, _) = tag("a")(i)?;
let rdf_type: IriRef = IriRef::iri(RdfVocab::rdf_type());
Ok((i, rdf_type))
}
fn at_pname_ns(i: Span) -> IRes<ShapeExprLabel> {
let (i, (_, _, pname)) = (char('@'), tws0, pname_ns_iri_ref).parse(i)?;
let label = ShapeExprLabel::iri_ref(pname);
Ok((i, label))
}
fn at_pname_ln(i: Span) -> IRes<ShapeExprLabel> {
let (i, (_, _, pname_ln)) = (char('@'), tws0, pname_ln).parse(i)?;
Ok((i, ShapeExprLabel::iri_ref(pname_ln)))
}
fn regexp(i: Span) -> IRes<Pattern> {
let (i, (_, str, _, flags)) = (char('/'), pattern, cut(char('/')), flags).parse(i)?;
let flags = flags.fragment();
let flags = if flags.is_empty() {
None
} else {
Some(flags.to_string())
};
let str = unescape_pattern(&str);
Ok((i, Pattern { str, flags }))
}
fn unescape_pattern(str: &str) -> String {
let non_escaped = [
'n', 'r', 't', '\\', '|', '.', '?', '*', '+', '(', ')', '{', '}', '$', '-', '[', ']', '^',
];
let mut queue: VecDeque<_> = str.chars().collect();
let mut r = String::new();
while let Some(c) = queue.pop_front() {
if c != '\\' {
r.push(c);
continue;
}
match queue.pop_front() {
Some(c) if non_escaped.contains(&c) => {
r.push('\\');
r.push(c)
},
Some('u') => {
let mut s = String::new();
for _ in 0..4 {
if let Some(c) = queue.pop_front() {
s.push(c)
} else {
panic!("unescape_pattern: \\u is not followed by 4 chars")
}
}
let u = u32::from_str_radix(&s, 16).unwrap();
let c = char::from_u32(u).unwrap();
r.push(c)
},
Some('U') => {
let mut s = String::new();
for _ in 0..8 {
if let Some(c) = queue.pop_front() {
s.push(c)
} else {
panic!("unescape_pattern: \\u is not followed by 8 chars")
}
}
let u = u32::from_str_radix(&s, 16).unwrap();
let c = char::from_u32(u).unwrap();
r.push(c)
},
Some(c) => r.push(c),
None => panic!("unescape pattern. No more characters after \\"),
}
}
r
}
fn pattern(i: Span) -> IRes<String> {
let (i, chars) = many1(alt((
map(none_of(REQUIRES_ESCAPE_PATTERN), |c| vec![c]),
escaped_pattern,
map(uchar, |c| vec![c]),
)))
.parse(i)?;
let str = chars.iter().flatten().collect();
Ok((i, str))
}
fn escaped_pattern(i: Span) -> IRes<Vec<char>> {
let (i, c) = preceded(token(r"\"), one_of(r#"nrt\|.?*+(){}$-[]^/"#)).parse(i)?;
Ok((i, vec!['\\', c]))
}
const REQUIRES_ESCAPE_PATTERN: &str = "\u{2F}\u{5C}\u{0A}\u{0D}";
fn flags(i: Span) -> IRes<Span> {
recognize(many0(alt((char('s'), char('m'), char('i'), char('x'))))).parse(i)
}
pub(crate) fn iri(i: Span) -> IRes<IriRef> {
alt((iri_ref_s, prefixed_name())).parse(i)
}
fn iri_ref_s(i: Span) -> IRes<IriRef> {
let (i, iri) = iri_ref(i)?;
Ok((i, iri.into()))
}
fn prefixed_name<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, IriRef> {
traced(
"prefixed_name",
map_error(
move |i| {
let (i, iri_ref) = alt((pname_ln, pname_ns_iri_ref)).parse(i)?;
Ok((i, iri_ref))
},
|| ShExParseError::ExpectedPrefixedName,
),
)
}
fn pname_ns_iri_ref(i: Span) -> IRes<IriRef> {
let (i, pname_ns) = pname_ns(i)?;
Ok((i, IriRef::prefixed(*pname_ns.fragment(), "")))
}
fn blank_node(i: Span) -> IRes<BNode> {
map(blank_node_label, BNode::new).parse(i)
}
fn blank_node_label(i: Span<'_>) -> IRes<'_, &str> {
let (i, _) = tag("_:")(i)?;
let (i, label) = recognize((one_if(is_pn_chars_u_digit), blank_node_label2)).parse(i)?;
Ok((i, label.fragment()))
}
fn is_pn_chars_u_digit(c: char) -> bool {
is_digit(c) || is_pn_chars_u(c)
}
fn is_pn_chars_or_dot(c: char) -> bool {
c == '.' || is_pn_chars(c)
}
fn blank_node_label2(src: Span) -> IRes<()> {
match blank_node_label3(src) {
Ok((left, m)) => {
if m.ends_with('.') {
tracing::error!("This code is pending review when the last is a '.' {left}");
Ok((left, ()))
} else {
Ok((left, ()))
}
},
Err(e) => Err(e),
}
}
fn blank_node_label3(i: Span) -> IRes<Span> {
take_while(is_pn_chars_or_dot)(i)
}
fn integer<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, i128> {
map_error(
move |i| {
let (i, (maybe_sign, digits)) = (opt(one_of("+-")), digits).parse(i)?;
let n = match maybe_sign {
None => digits,
Some('+') => digits,
Some('-') => -digits,
_ => panic!("Internal parser error, Strange maybe_sign: {maybe_sign:?}"),
};
Ok((i, n))
},
|| ShExParseError::Integer,
)
}
fn decimal(i: Span) -> IRes<NumericLiteral> {
map_res(
pair(recognize(preceded(opt(sign), digit0)), preceded(token("."), digit1)),
|(whole, fraction)| {
let w = whole
.fragment()
.parse::<i64>()
.map_err(|e| RDFError::ConversionError { msg: e.to_string() })?;
let f = fraction
.fragment()
.parse::<u32>()
.map_err(|e| RDFError::ConversionError { msg: e.to_string() })?;
NumericLiteral::decimal_from_parts(w, f)
},
)
.parse(i)
}
fn double(i: Span) -> IRes<f64> {
map_res(
recognize(preceded(
opt(sign),
alt((
recognize((digit1, token("."), digit0, exponent)),
recognize((token("."), digit1, exponent)),
recognize(pair(digit1, exponent)),
)),
)),
|value: LocatedSpan<&str>| value.parse(),
)
.parse(i)
}
fn exponent(input: Span) -> IRes<Span> {
recognize((one_of("eE"), opt(sign), digit1)).parse(input)
}
fn sign(input: Span) -> IRes<Span> {
recognize(one_of("+-")).parse(input)
}
fn digits(i: Span) -> IRes<i128> {
map_res(digit1, |number: Span| number.parse::<i128>()).parse(i)
}
fn pname_ln(i: Span) -> IRes<IriRef> {
let (i, (prefix, local)) = (pname_ns, pn_local).parse(i)?;
Ok((i, IriRef::prefixed(*prefix.fragment(), local)))
}
fn pn_local(i: Span<'_>) -> IRes<'_, &str> {
let (i, cs) = recognize((alt((one_if(is_pn_local_start), plx)), pn_local2)).parse(i)?;
Ok((i, cs.fragment()))
}
fn is_pn_local_start(c: char) -> bool {
c == ':' || is_digit(c) || is_pn_chars_u(c)
}
fn pn_local2(src: Span) -> IRes<()> {
match pn_local3(src) {
Ok((left, m)) => {
if m.ends_with('.') {
tracing::error!("This code is pending review when the last is a '.' {left}");
Ok((left, ()))
} else {
Ok((left, ()))
}
},
Err(e) => Err(e),
}
}
fn pn_local3(i: Span) -> IRes<Span> {
recognize(many0(alt((pn_chars_colon, plx, char_dot)))).parse(i)
}
fn pn_chars_colon(i: Span) -> IRes<Span> {
take_while1(is_pn_chars_colon)(i)
}
fn is_pn_chars_colon(c: char) -> bool {
c == ':' || is_pn_chars(c)
}
fn plx(i: Span) -> IRes<Span> {
alt((percent, pn_local_esc)).parse(i)
}
fn pn_local_esc(i: Span) -> IRes<Span> {
recognize((char('\\'), one_if(|c| "_~.-!$&'()*+,;=/?#@%".contains(c)))).parse(i)
}
fn percent(i: Span) -> IRes<Span> {
recognize((char('%'), one_if(is_hex), one_if(is_hex))).parse(i)
}
fn is_hex(c: char) -> bool {
is_digit(c) || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
}
fn iri_ref(i: Span) -> IRes<IriS> {
let (i, str) = delimited(
char('<'),
iri_chars,
char('>'),
)
.parse(i)?;
Ok((i, IriS::new_unchecked(str.as_str())))
}
fn iri_chars(i: Span) -> IRes<String> {
let (i, chars) = many0(iri_char).parse(i)?;
let s: String = chars.iter().collect();
Ok((i, s))
}
fn iri_char(i: Span) -> IRes<char> {
let (i, char) = alt((iri_chr, uchar)).parse(i)?;
Ok((i, char))
}
#[derive(Error, Debug)]
enum UCharError {
#[error("Doesn't start by \\")]
NoStartByBackSlash,
#[error("unescape_code: \\u is not followed by 4 chars")]
LowercaseUNotFollowedBy4chars,
#[error("unescape code: \\U is not followed by 8 chars")]
UppercaseUNotFollowedBy8chars,
#[error("Unexpected {c} after \\")]
UnexpectedCharacterAfterBackSlash { c: char },
#[error("No character after \\")]
NoCharAfterBackSlash,
}
fn unescape_uchar(str: &str) -> Result<char, UCharError> {
let mut r: char = '?';
let mut queue: VecDeque<_> = str.chars().collect();
while let Some(c) = queue.pop_front() {
if c != '\\' {
return Err(UCharError::NoStartByBackSlash);
}
match queue.pop_front() {
Some('u') => {
let mut s = String::new();
for _ in 0..4 {
if let Some(c) = queue.pop_front() {
s.push(c)
} else {
return Err(UCharError::LowercaseUNotFollowedBy4chars);
}
}
let u = u32::from_str_radix(&s, 16).unwrap();
r = char::from_u32(u).unwrap();
},
Some('U') => {
let mut s = String::new();
for _ in 0..8 {
if let Some(c) = queue.pop_front() {
s.push(c)
} else {
return Err(UCharError::UppercaseUNotFollowedBy8chars);
}
}
let u = u32::from_str_radix(&s, 16).unwrap();
r = char::from_u32(u).unwrap();
},
Some(c) => return Err(UCharError::UnexpectedCharacterAfterBackSlash { c }),
None => return Err(UCharError::NoCharAfterBackSlash),
}
}
Ok(r)
}
fn iri_chr(i: Span) -> IRes<char> {
satisfy(is_iri_ref)(i)
}
#[inline]
fn is_iri_ref(chr: char) -> bool {
chr > ' ' && "<>\"{}|^`\\".find(chr).is_none()
}
fn pname_ns(i: Span) -> IRes<Span> {
let (i, (maybe_pn_prefix, _)) = (opt(pn_prefix), char(':')).parse(i)?;
Ok((i, maybe_pn_prefix.unwrap_or(Span::from(""))))
}
fn pn_prefix(i: Span) -> IRes<Span> {
recognize((
satisfy(is_pn_chars_base),
take_while(is_pn_chars),
rest_pn_chars, ))
.parse(i)
}
fn rest_pn_chars(i: Span) -> IRes<Vec<Span>> {
let (i, vs) = fold_many0(
(char_dot, take_while1(is_pn_chars)),
Vec::new,
|mut cs: Vec<Span>, (c, rs)| {
cs.push(c);
cs.push(rs);
cs
},
)
.parse(i)?;
Ok((i, vs))
}
fn char_dot(i: Span) -> IRes<Span> {
recognize(char('.')).parse(i)
}
fn is_pn_chars_base(c: char) -> bool {
is_alpha(c)
|| in_range(c, 0xC0, 0x00D6)
|| in_range(c, 0x00D8, 0x00F6)
|| in_range(c, 0x00F8, 0x02FF)
|| in_range(c, 0x0370, 0x037D)
|| in_range(c, 0x037F, 0x1FFF)
|| in_range(c, 0x200C, 0x200D)
|| in_range(c, 0x2070, 0x218F)
|| in_range(c, 0x2C00, 0x2FEF)
|| in_range(c, 0x3001, 0xD7FF)
|| in_range(c, 0xF900, 0xFDCF)
|| in_range(c, 0xFDF0, 0xFFFD)
|| in_range(c, 0x10000, 0xEFFFF)
}
fn is_pn_chars_u(c: char) -> bool {
c == '_' || is_pn_chars_base(c)
}
fn is_pn_chars(c: char) -> bool {
is_pn_chars_u(c)
|| c == '-'
|| is_digit(c)
|| c == 0xB7 as char
|| in_range(c, 0x0300, 0x036F)
|| in_range(c, 0x203F, 0x2040)
}
fn is_alpha(c: char) -> bool {
c.is_ascii_lowercase() || c.is_ascii_uppercase()
}
fn is_digit(c: char) -> bool {
c.is_ascii_digit()
}
fn in_range(c: char, lower: u32, upper: u32) -> bool {
c as u32 >= lower && c as u32 <= upper
}
fn one_if<'a, F: Fn(char) -> bool>(f: F) -> impl Fn(Span<'a>) -> IRes<'a, Span<'a>> {
move |i| {
if let Some(c) = i.chars().next() {
if f(c) {
Ok(i.take_split(1))
} else {
Err(Err::Error(error_position!(i, ErrorKind::OneOf)))
}
} else {
Err(Err::Error(error_position!(i, ErrorKind::OneOf)))
}
}
}
fn symbol(value: &str) -> impl FnMut(Span) -> IRes<()> {
move |i| {
let (i, (_, _, _)) = (tws0, tag_no_case(value), tws0).parse(i)?;
Ok((i, ()))
}
}
fn many1_sep<'a, O, O2, F, G, H>(mut parser_many: F, mut sep: G, maker: H, mut i: Span<'a>) -> IRes<'a, O2>
where
F: FnMut(Span<'a>) -> IRes<'a, O>,
G: FnMut(Span<'a>) -> IRes<'a, ()>,
H: Fn(Vec<O>) -> O2,
{
let mut vs = Vec::new();
if let Ok((left, _)) = tws0(i) {
i = left;
}
match parser_many(i) {
Ok((left, v)) => {
vs.push(v);
i = left;
},
Err(e) => return Err(e),
}
loop {
if let Ok((left, _)) = tws0(i) {
i = left;
}
match sep(i) {
Ok((left, _)) => {
i = left;
},
_ => return Ok((i, maker(vs))),
}
if let Ok((left, _)) = tws0(i) {
i = left;
}
match parser_many(i) {
Ok((left, v)) => {
vs.push(v);
i = left;
},
_ => return Ok((i, maker(vs))),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_prefix_id_with_dots() {
let s = shex_statement()(Span::new("prefix a.b.c: <urn>")).unwrap();
assert_eq!(
s.1,
ShExStatement::PrefixDecl {
alias: "a.b.c",
iri: IriS::new_unchecked("urn")
}
);
}
#[test]
fn test_basic_shape_decl() {
let s = shex_statement()(Span::new(":S {}")).unwrap();
assert_eq!(
s.1,
ShExStatement::ShapeDecl {
is_abstract: false,
shape_label: ShapeExprLabel::prefixed("", "S"),
shape_expr: ShapeExpr::empty_shape()
}
);
}
#[test]
fn test_tws_statement() {
assert!(shex_statement()(Span::new(" ")).is_err());
}
#[test]
fn test_string_literal() {
let (_, result) = string_literal1()(Span::new("'a'")).unwrap();
let expected = "a".to_string();
assert_eq!(result, expected)
}
#[test]
fn test_iri_ref_uchar() {
let (_, result) = iri_ref(Span::new("<http://example.org/p\\u0031>")).unwrap();
let expected = IriS::new_unchecked("http://example.org/p1");
assert_eq!(result, expected)
}
#[test]
fn test_value_set() {
let (_, result) = value_set()(Span::new("[ 'a' ]")).unwrap();
let expected_values = vec![ValueSetValue::string_literal("a", None)];
let expected = NodeConstraint::new().with_values(expected_values);
assert_eq!(result, expected)
}
#[test]
fn test_node_constraint_value_set() {
let (_, result) = lit_node_constraint()(Span::new("[ 'a' ]")).unwrap();
let expected_values = vec![ValueSetValue::string_literal("a", None)];
let expected = NodeConstraint::new().with_values(expected_values);
assert_eq!(result, expected)
}
#[test]
fn test_shape_atom_node_constraint() {
let (_, result) = lit_node_constraint_shape_expr()(Span::new("[ 'a' ]")).unwrap();
let expected_values = vec![ValueSetValue::string_literal("a", None)];
let expected = ShapeExpr::NodeConstraint(NodeConstraint::new().with_values(expected_values));
assert_eq!(result, expected)
}
#[test]
fn test_triple_constraint() {
let (_, result) = triple_constraint()(Span::new(":p xsd:int")).unwrap();
let nc = ShapeExpr::node_constraint(NodeConstraint::new().with_datatype(IriRef::prefixed("xsd", "int")));
let expected = TripleExpr::triple_constraint(None, None, IriRef::prefixed("", "p"), Some(nc), None, None);
assert_eq!(result, expected)
}
#[test]
fn test_inline_shape_expr() {
let (_, result) = inline_shape_expression()(Span::new(":p")).unwrap();
let expected = ShapeExpr::node_constraint(NodeConstraint::new().with_datatype(IriRef::prefixed("", "p")));
assert_eq!(result, expected)
}
#[test]
fn test_numeric_literal() {
let (_, result) = numeric_literal(Span::new("0")).unwrap();
let expected = NumericLiteral::integer(0);
assert_eq!(result, expected)
}
}