use crate::{
Error,
compile::{Compiler, Inst},
re::{Captures, Haystack, RawCaptures, RegexEngine, Sliceable},
};
use std::{fmt, ops::Deref, sync::Arc};
mod extract;
mod guard;
mod narrow;
mod parallel;
pub(crate) use extract::Extract;
pub(crate) use guard::Guard;
pub(crate) use narrow::Narrow;
#[derive(Clone)]
pub struct Structex<R>
where
R: RegexEngine,
{
raw: Arc<str>,
inner: Arc<Inner<R>>,
}
impl<R> fmt::Debug for Structex<R>
where
R: RegexEngine,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("Structex").field(&self.raw).finish()
}
}
impl<R> fmt::Display for Structex<R>
where
R: RegexEngine,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Structex({})", self.raw)
}
}
impl<R> Structex<R>
where
R: RegexEngine,
{
pub fn new(se: &str) -> Result<Self, Error> {
StructexBuilder::new(se).build()
}
pub fn as_str(&self) -> &str {
&self.raw
}
pub fn actions(&self) -> &[Action] {
&self.inner.actions
}
pub fn tags(&self) -> &[char] {
&self.inner.tags
}
pub fn iter_tagged_captures<'s, 'h, H>(
&'s self,
haystack: &'h H,
) -> TaggedCapturesIter<'s, 'h, R, H>
where
H: Haystack<R> + ?Sized,
{
TaggedCapturesIter::new(
&self.inner.inst,
self.inner.clone(),
haystack,
Dot::Range {
from: 0,
to: haystack.max_len(),
},
)
}
pub fn iter_tagged_captures_between<'s, 'h, H>(
&'s self,
byte_from: usize,
byte_to: usize,
haystack: &'h H,
) -> TaggedCapturesIter<'s, 'h, R, H>
where
H: Haystack<R> + ?Sized,
{
TaggedCapturesIter::new(
&self.inner.inst,
self.inner.clone(),
haystack,
Dot::Range {
from: byte_from,
to: byte_to,
},
)
}
}
trait ActionArgFn: Fn(String) -> String + 'static {}
impl<F> ActionArgFn for F where F: Fn(String) -> String + 'static {}
fn raw_arg_string(s: String) -> String {
s
}
fn newline_and_tab_string(s: String) -> String {
s.replace("\\n", "\n").replace("\\t", "\t")
}
#[derive(Clone)]
pub struct StructexBuilder {
expr: String,
action_arg_fn: Arc<dyn ActionArgFn>,
require_actions: bool,
allow_top_level_actions: bool,
allowed_argless_tags: Option<String>,
allowed_single_arg_tags: Option<String>,
}
impl StructexBuilder {
pub fn new(expr: impl Into<String>) -> Self {
Self {
expr: expr.into(),
action_arg_fn: Arc::new(newline_and_tab_string),
require_actions: false,
allow_top_level_actions: false,
allowed_argless_tags: None,
allowed_single_arg_tags: None,
}
}
pub fn raw_arg_strings(mut self) -> Self {
self.action_arg_fn = Arc::new(raw_arg_string);
self
}
pub fn action_argument_fn<F>(mut self, f: F) -> Self
where
F: Fn(String) -> String + 'static,
{
self.action_arg_fn = Arc::new(f);
self
}
pub fn require_actions(mut self) -> Self {
self.require_actions = true;
self
}
pub fn allow_top_level_actions(mut self) -> Self {
self.allow_top_level_actions = true;
self
}
pub fn with_allowed_argless_tags(mut self, tags: impl Into<String>) -> Self {
self.allowed_argless_tags = Some(tags.into());
self
}
pub fn with_allowed_single_arg_tags(mut self, tags: impl Into<String>) -> Self {
self.allowed_single_arg_tags = Some(tags.into());
self
}
pub fn build<R>(self) -> Result<Structex<R>, Error>
where
R: RegexEngine,
{
let mut c = Compiler {
require_actions: self.require_actions,
allow_top_level_actions: self.allow_top_level_actions,
allowed_argless_tags: self.allowed_argless_tags,
allowed_single_arg_tags: self.allowed_single_arg_tags,
..Default::default()
};
let inst = c.compile(&self.expr)?;
let Compiler {
re, tags, actions, ..
} = c;
let actions: Box<[_]> = actions
.into_iter()
.enumerate()
.map(|(id, mut a)| Action {
id,
tag: a.tag,
arg: a.arg.take().map(|s| Arc::from((self.action_arg_fn)(s))),
})
.collect();
Ok(Structex {
raw: Arc::from(self.expr),
inner: Arc::new(Inner {
inst,
re: re
.into_iter()
.map(|re| R::compile(&re).map_err(|e| Error::Regex(Box::new(e))))
.collect::<Result<_, _>>()?,
tags: tags.into_boxed_slice(),
actions,
}),
})
}
}
pub(super) struct Inner<R>
where
R: RegexEngine,
{
pub(super) inst: Inst,
pub(super) re: Box<[R]>,
pub(super) tags: Box<[char]>,
pub(super) actions: Box<[Action]>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) enum Dot {
Range { from: usize, to: usize },
Captures(RawCaptures),
}
impl Dot {
pub fn loc(&self) -> (usize, usize) {
match self {
Self::Range { from, to } => (*from, *to),
Self::Captures(caps) => caps.get_match(),
}
}
pub fn from(&self) -> usize {
match self {
Self::Range { from, .. } => *from,
Self::Captures(caps) => caps.from(),
}
}
pub fn to(&self) -> usize {
match self {
Self::Range { to, .. } => *to,
Self::Captures(caps) => caps.to(),
}
}
fn into_captures<'h, H>(self, haystack: &'h H) -> Captures<'h, H>
where
H: Sliceable + ?Sized,
{
match self {
Self::Range { from, to } => Captures::new(haystack, vec![Some((from, to))]),
Self::Captures(c) => Captures::new(haystack, c.caps),
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct TaggedCaptures<'h, H>
where
H: Sliceable + ?Sized,
{
pub captures: Captures<'h, H>,
pub action: Option<Action>,
}
impl<'h, H> TaggedCaptures<'h, H>
where
H: Sliceable + ?Sized,
{
pub fn as_slice(&self) -> H::Slice<'_> {
self.captures.match_text()
}
pub fn id(&self) -> Option<usize> {
self.action.as_ref().map(|a| a.id)
}
pub fn tag(&self) -> Option<char> {
self.action.as_ref().map(|a| a.tag)
}
pub fn arg(&self) -> Option<&str> {
self.action.as_ref().and_then(|a| a.arg.as_deref())
}
pub fn has_action(&self) -> bool {
self.action.is_some()
}
}
impl<'h, H> Deref for TaggedCaptures<'h, H>
where
H: Sliceable + ?Sized,
{
type Target = Captures<'h, H>;
fn deref(&self) -> &Self::Target {
&self.captures
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Action {
id: usize,
tag: char,
arg: Option<Arc<str>>,
}
impl Action {
pub fn id(&self) -> usize {
self.id
}
pub fn tag(&self) -> char {
self.tag
}
pub fn arg(&self) -> Option<&str> {
self.arg.as_deref()
}
}
pub struct TaggedCapturesIter<'s, 'h, R, H>
where
R: RegexEngine,
H: Haystack<R> + ?Sized,
{
inner: Option<MatchesInner<'s, 'h, R, H>>,
}
impl<'s, 'h, R, H> TaggedCapturesIter<'s, 'h, R, H>
where
R: RegexEngine,
H: Haystack<R> + ?Sized,
{
fn new(inst: &'s Inst, inner: Arc<Inner<R>>, haystack: &'h H, dot: Dot) -> Self {
Self {
inner: MatchesInner::new(inst, inner, haystack, dot),
}
}
}
impl<'s, 'h, R, H> Iterator for TaggedCapturesIter<'s, 'h, R, H>
where
R: RegexEngine,
H: Haystack<R> + ?Sized,
{
type Item = TaggedCaptures<'h, H>;
fn next(&mut self) -> Option<Self::Item> {
self.inner.as_mut().and_then(|inner| inner.next())
}
}
enum MatchesInner<'s, 'h, R, H>
where
R: RegexEngine,
H: Haystack<R> + ?Sized,
{
Extract(extract::Iter<'s, 'h, R, H>),
Parallel(parallel::Iter<'s, 'h, R, H>),
Emit(Option<TaggedCaptures<'h, H>>),
}
impl<'s, 'h, R, H> MatchesInner<'s, 'h, R, H>
where
R: RegexEngine,
H: Haystack<R> + ?Sized,
{
fn new(inst: &'s Inst, inner: Arc<Inner<R>>, haystack: &'h H, dot: Dot) -> Option<Self> {
match inst {
Inst::EmitMatch => Some(Self::Emit(Some(TaggedCaptures {
captures: dot.into_captures(haystack),
action: None,
}))),
Inst::Action(i) => Some(Self::Emit(Some(TaggedCaptures {
captures: dot.into_captures(haystack),
action: Some(inner.actions[*i].clone()),
}))),
Inst::Narrow(n) => n.apply(haystack, dot, inner),
Inst::Guard(g) => g.apply(haystack, dot, inner),
Inst::Extract(ext) => {
Some(Self::Extract(extract::Iter::new(haystack, dot, ext, inner)))
}
Inst::Parallel(bs) => Some(Self::Parallel(parallel::Iter::new(
haystack, dot, bs, inner,
))),
}
}
}
impl<'s, 'h, R, H> Iterator for MatchesInner<'s, 'h, R, H>
where
R: RegexEngine,
H: Haystack<R> + ?Sized,
{
type Item = TaggedCaptures<'h, H>;
fn next(&mut self) -> Option<Self::Item> {
match self {
Self::Extract(ext) => ext.next(),
Self::Parallel(p) => p.next(),
Self::Emit(opt) => opt.take(),
}
}
}