use std::collections::HashMap;
use crate::{
compile::{CompileResult, CompileState},
error::{CompileError, CompileErrorKind, ParseError},
options::{CompileOptions, ParseOptions, RegexFlavor},
regex::Regex,
rule::Rule,
span::Span,
};
#[derive(Clone)]
pub(crate) struct Group<'i> {
parts: Vec<Rule<'i>>,
capture: Option<Capture<'i>>,
pub(crate) span: Span,
}
impl<'i> Group<'i> {
pub(crate) fn new(parts: Vec<Rule<'i>>, capture: Option<Capture<'i>>, span: Span) -> Self {
Group { parts, capture, span }
}
pub(crate) fn set_capture(&mut self, capture: Capture<'i>) {
self.capture = Some(capture);
}
pub(crate) fn is_capturing(&self) -> bool {
self.capture.is_some()
}
pub(crate) fn get_capturing_groups(
&self,
count: &mut u32,
map: &mut HashMap<String, u32>,
within_variable: bool,
) -> Result<(), CompileError> {
match self.capture {
Some(Capture { name: Some(name) }) => {
if within_variable {
return Err(CompileErrorKind::CaptureInLet.at(self.span));
}
if map.contains_key(name) {
return Err(
CompileErrorKind::NameUsedMultipleTimes(name.to_string()).at(self.span)
);
}
*count += 1;
map.insert(name.to_string(), *count);
}
Some(Capture { name: None }) => {
if within_variable {
return Err(CompileErrorKind::CaptureInLet.at(self.span));
}
*count += 1;
}
None => {}
};
for rulex in &self.parts {
rulex.get_capturing_groups(count, map, within_variable)?;
}
Ok(())
}
pub(crate) fn compile<'c>(
&'c self,
options: CompileOptions,
state: &mut CompileState<'c, 'i>,
) -> CompileResult<'i> {
if self.capture.is_some() {
state.next_idx += 1;
}
Ok(Regex::Group(RegexGroup {
parts: self
.parts
.iter()
.map(|part| part.comp(options, state))
.collect::<Result<_, _>>()?,
capture: match self.capture {
Some(Capture { name: Some(name) }) => RegexCapture::NamedCapture(name),
Some(Capture { name: None }) => RegexCapture::Capture,
None => RegexCapture::None,
},
}))
}
pub(crate) fn validate(&self, options: &ParseOptions) -> Result<(), ParseError> {
for rule in &self.parts {
rule.validate(options)?;
}
Ok(())
}
}
#[cfg(feature = "dbg")]
impl core::fmt::Debug for Group<'_> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self.capture {
Some(Capture { name: Some(name) }) => write!(f, "Group :{name}")?,
Some(_) => write!(f, "Group :")?,
None => write!(f, "Group")?,
}
if self.parts.is_empty() {
write!(f, "()")
} else {
let mut tup = f.debug_tuple("");
let mut tup = &mut tup;
for part in &self.parts {
tup = tup.field(part);
}
tup.finish()
}
}
}
#[derive(Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "dbg", derive(Debug))]
pub(crate) struct Capture<'i> {
pub(crate) name: Option<&'i str>,
}
impl<'i> Capture<'i> {
pub(crate) fn new(name: Option<&'i str>) -> Self {
Capture { name }
}
}
#[cfg_attr(feature = "dbg", derive(Debug))]
pub(crate) struct RegexGroup<'i> {
parts: Vec<Regex<'i>>,
capture: RegexCapture<'i>,
}
#[cfg_attr(feature = "dbg", derive(Debug))]
pub(crate) enum RegexCapture<'i> {
Capture,
NamedCapture(&'i str),
None,
NoneWithParens,
}
impl<'i> RegexGroup<'i> {
pub(crate) fn new(parts: Vec<Regex<'i>>, capture: RegexCapture<'i>) -> Self {
Self { parts, capture }
}
pub(crate) fn codegen(&self, buf: &mut String, flavor: RegexFlavor) {
match self.capture {
RegexCapture::NamedCapture(name) => {
match flavor {
RegexFlavor::Python | RegexFlavor::Pcre | RegexFlavor::Rust => {
buf.push_str("(?P<");
}
RegexFlavor::DotNet
| RegexFlavor::Java
| RegexFlavor::Ruby
| RegexFlavor::JavaScript => {
buf.push_str("(?<");
}
}
buf.push_str(name);
buf.push('>');
for part in &self.parts {
part.codegen(buf, flavor);
}
buf.push(')');
}
RegexCapture::Capture => {
buf.push('(');
for part in &self.parts {
part.codegen(buf, flavor);
}
buf.push(')');
}
RegexCapture::None => {
for part in &self.parts {
let needs_parens = part.needs_parens_in_group();
if needs_parens {
buf.push_str("(?:");
}
part.codegen(buf, flavor);
if needs_parens {
buf.push(')');
}
}
}
RegexCapture::NoneWithParens => {
for part in &self.parts {
buf.push_str("(?:");
part.codegen(buf, flavor);
buf.push(')');
}
}
}
}
pub(crate) fn needs_parens_before_repetition(&self) -> bool {
match self.capture {
RegexCapture::None if self.parts.len() == 1 => {
self.parts[0].needs_parens_before_repetition()
}
RegexCapture::NoneWithParens => false,
_ => true,
}
}
}