use std::{error::Error, fmt, str::CharIndices};
#[derive(Debug)]
pub struct InvalidReplaceCapture {
original_replace: String,
invalid_ident: Span,
num_leading_digits: usize,
}
impl Error for InvalidReplaceCapture {}
impl fmt::Display for InvalidReplaceCapture {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
#[derive(Clone, Copy)]
enum SpecialChar {
Newline,
CarriageReturn,
Tab,
}
impl SpecialChar {
fn new(c: char) -> Option<Self> {
match c {
'\n' => Some(Self::Newline),
'\r' => Some(Self::CarriageReturn),
'\t' => Some(Self::Tab),
_ => None,
}
}
fn render(self) -> char {
match self {
Self::Newline => '␊',
Self::CarriageReturn => '␍',
Self::Tab => '␉',
}
}
}
let Self { original_replace, invalid_ident, num_leading_digits } = self;
let mut formatted = String::new();
let mut arrows_start = Span::start_at(0);
for (byte_index, c) in original_replace.char_indices() {
let (prefix, suffix, text) = match SpecialChar::new(c) {
Some(c) => {
(Some("" ), Some("" ), c.render())
}
None => {
let (prefix, suffix) = if byte_index == invalid_ident.start {
(Some("" ), None)
} else if byte_index == invalid_ident.end.checked_sub(1).unwrap() {
(None, Some("" ))
} else {
(None, None)
};
(prefix, suffix, c)
}
};
if let Some(prefix) = prefix {
formatted.push_str(prefix);
}
formatted.push(text);
if let Some(suffix) = suffix {
formatted.push_str(suffix);
}
if byte_index < invalid_ident.start {
arrows_start.start += 1;
}
}
let arrows_span = arrows_start.end_offset(invalid_ident.len());
let mut arrows = " ".repeat(arrows_span.start);
arrows.push_str(&"^".repeat(arrows_span.len()));
let ident = invalid_ident.slice(original_replace);
let (number, the_rest) = ident.split_at(*num_leading_digits);
let disambiguous = format!("${{{number}}}{the_rest}");
let error_message = format!("The numbered capture group `${number}` in the replacement text is ambiguous.");
let hint_message = format!("{}: Use curly braces to disambiguate it `{}`.", "hint", disambiguous);
writeln!(f, "{}", error_message)?;
writeln!(f, "{}", hint_message)?;
writeln!(f, "{}", formatted)?;
write!(f, "{}", arrows)
}
}
pub fn validate_replace(s: &str) -> Result<(), InvalidReplaceCapture> {
for ident in ReplaceCaptureIter::new(s) {
let mut char_it = ident.name.char_indices();
let (_, c) = char_it.next().unwrap();
if c.is_ascii_digit() {
for (i, c) in char_it {
if !c.is_ascii_digit() {
return Err(InvalidReplaceCapture { original_replace: s.to_owned(), invalid_ident: ident.span, num_leading_digits: i });
}
}
}
}
Ok(())
}
#[derive(Clone, Copy, Debug)]
struct Span {
start: usize,
end: usize,
}
impl Span {
fn start_at(start: usize) -> SpanOpen {
SpanOpen { start }
}
fn new(start: usize, end: usize) -> Self {
assert!(start < end);
Self { start, end }
}
fn slice(self, s: &str) -> &str {
&s[self.start..self.end]
}
fn len(self) -> usize {
self.end - self.start
}
}
#[derive(Clone, Copy)]
struct SpanOpen {
start: usize,
}
impl SpanOpen {
fn end_at(self, end: usize) -> Span {
let Self { start } = self;
Span::new(start, end)
}
fn end_offset(self, offset: usize) -> Span {
assert_ne!(offset, 0);
let Self { start } = self;
self.end_at(start + offset)
}
}
#[derive(Debug)]
struct Capture<'rep> {
name: &'rep str,
span: Span,
}
impl<'rep> Capture<'rep> {
fn new(name: &'rep str, span: Span) -> Self {
Self { name, span }
}
}
struct ReplaceCaptureIter<'rep>(CharIndices<'rep>);
impl<'rep> ReplaceCaptureIter<'rep> {
fn new(s: &'rep str) -> Self {
Self(s.char_indices())
}
}
impl<'rep> Iterator for ReplaceCaptureIter<'rep> {
type Item = Capture<'rep>;
fn next(&mut self) -> Option<Self::Item> {
loop {
let (start, _) = self.0.find(|(_, c)| *c == '$')?;
let replacement = self.0.as_str();
let rep = replacement.as_bytes();
let open_span = Span::start_at(start + 1);
let maybe_cap = match rep.first()? {
b'$' => {
self.0.next().unwrap();
None
}
b'{' => find_cap_ref_braced(rep, open_span),
_ => find_cap_ref(rep, open_span),
};
if let Some(cap) = maybe_cap {
let mut remaining_bytes = cap.name.len();
while remaining_bytes > 0 {
let (_, c) = self.0.next().unwrap();
remaining_bytes = remaining_bytes.checked_sub(c.len_utf8()).unwrap();
}
return Some(cap);
}
}
}
}
fn find_cap_ref(rep: &[u8], open_span: SpanOpen) -> Option<Capture<'_>> {
if rep.is_empty() {
return None;
}
let mut cap_end = 0;
while rep.get(cap_end).copied().is_some_and(is_valid_cap_letter) {
cap_end += 1;
}
if cap_end == 0 {
return None;
}
let name = core::str::from_utf8(&rep[..cap_end]).expect("valid UTF-8 capture name");
Some(Capture::new(name, open_span.end_offset(name.len())))
}
fn find_cap_ref_braced(rep: &[u8], open_span: SpanOpen) -> Option<Capture<'_>> {
assert_eq!(b'{', rep[0]);
let mut cap_end = 1;
while rep.get(cap_end).is_some_and(|&b| b != b'}') {
cap_end += 1;
}
if rep.get(cap_end).is_none_or(|&b| b != b'}') {
return None;
}
let name = core::str::from_utf8(&rep[..cap_end + 1]).ok()?;
Some(Capture::new(name, open_span.end_offset(name.len())))
}
fn is_valid_cap_letter(b: u8) -> bool {
matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_')
}