use std::collections::HashMap;
use std::sync::LazyLock;
use proc_macro2::Span;
use regex_automata::dfa::regex::Regex;
use syn::Ident;
use crate::error::Errors;
use crate::parser::definition::Literal;
use crate::pattern::Pattern;
pub struct Subpattern {
name: Ident,
pattern: String,
}
impl Subpattern {
fn new(name: Ident, pattern_src: &Literal) -> Self {
let pattern_str = pattern_src.escape(false);
let flags = if pattern_src.unicode() { "u" } else { "-u" };
let pattern = format!("(?{flags}:{pattern_str})");
Self { name, pattern }
}
}
#[derive(Default)]
pub struct Subpatterns {
map: HashMap<String, Subpattern>,
}
static SUBPATTERN_IDENT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[0-9a-zA-Z_]+").unwrap());
static SUBPATTERN_GROUP: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\(\?\&[0-9a-zA-Z_]+\)").unwrap());
impl Subpatterns {
pub fn new(subpatterns: &Vec<(Ident, Literal)>, utf8_mode: bool, errors: &mut Errors) -> Self {
let mut build = Self {
map: HashMap::new(),
};
for (name, pattern) in subpatterns {
let name_string = name.to_string();
let mut subpattern = Subpattern::new(name.clone(), pattern);
if !SUBPATTERN_IDENT.is_match(&name_string) {
errors.err(
format!("Invalid subpattern name: `{name}`"),
subpattern.name.span(),
);
continue;
}
if let Some(subst_pattern) =
build.subst_subpatterns(&subpattern.pattern, pattern.span(), errors)
{
subpattern.pattern = subst_pattern
} else {
continue;
};
match Pattern::compile(
false,
&subpattern.pattern,
pattern.token().to_string(),
true,
false,
) {
Err(msg) => {
errors.err(msg, pattern.span());
continue;
}
Ok(pat) => {
let utf8_pat = pat.hir().properties().is_utf8();
if utf8_mode && !utf8_pat {
errors.err(format!(concat!(
"UTF-8 mode is requested, but the subpattern {} = {} can match invalid utf8.\n",
"You can disable UTF-8 mode with #[logos(utf8 = false)]"
), name, pat.source()), pattern.span());
continue;
}
}
}
if let Some(existing) = build.map.insert(name_string, subpattern) {
errors
.err(format!("Subpattern `{name}` already exists"), name.span())
.err("Previously assigned here", existing.name.span());
continue;
}
}
build
}
pub fn subst_subpatterns(
&self,
pattern: &str,
span: Span,
errors: &mut Errors,
) -> Option<String> {
let mut fragments = Vec::new();
let mut was_error = false;
let mut current_pos = 0;
for group in SUBPATTERN_GROUP.find_iter(pattern) {
if group.start() > current_pos {
fragments.push(&pattern[current_pos..group.start()]);
}
let name = &pattern[group.start() + 3..group.end() - 1]; if let Some(subpattern) = self.map.get(name) {
fragments.push(&subpattern.pattern);
} else {
was_error = true;
errors.err(format!("Subpattern `{name}` not found"), span);
}
current_pos = group.end();
}
if current_pos < pattern.len() {
fragments.push(&pattern[current_pos..]);
}
if was_error {
None
} else {
Some(fragments.join(""))
}
}
}