use std::cell::RefCell;
use crate::buffer::{Buffer, BufferId};
use crate::emacs_core::casefiddle::apply_replace_match_case;
use crate::emacs_core::regex_emacs::{
self, BufferSyntaxLookup, CompiledPattern, DefaultSyntaxLookup, MatchRegisters, SyntaxLookup,
};
use crate::heap_types::LispString;
pub(crate) const REPLACE_MATCH_SUBEXP_MISSING: &str = "replace-match subexpression does not exist";
const SEARCH_PATTERN_CACHE_SIZE: usize = 20;
fn match_data_from_registers(regs: &MatchRegisters, offset: usize) -> MatchData {
let num_groups = regs.num_regs();
let mut groups = Vec::with_capacity(num_groups);
for i in 0..num_groups {
if regs.start[i] >= 0 && regs.end[i] >= 0 {
groups.push(Some((
regs.start[i] as usize + offset,
regs.end[i] as usize + offset,
)));
} else {
groups.push(None);
}
}
MatchData {
groups,
searched_string: None,
searched_buffer: None,
buffer_positions_are_bytes: false,
}
}
fn storage_rel_to_emacs_byte(text: &str, base_emacs_byte: usize, storage_pos: usize) -> usize {
base_emacs_byte
+ crate::emacs_core::string_escape::storage_byte_to_logical_byte(text, storage_pos)
}
fn buffer_match_data_from_registers(regs: &MatchRegisters, base_emacs_byte: usize) -> MatchData {
let num_groups = regs.num_regs();
let mut groups = Vec::with_capacity(num_groups);
for i in 0..num_groups {
if regs.start[i] >= 0 && regs.end[i] >= 0 {
groups.push(Some((
base_emacs_byte + regs.start[i] as usize,
base_emacs_byte + regs.end[i] as usize,
)));
} else {
groups.push(None);
}
}
MatchData {
groups,
searched_string: None,
searched_buffer: None,
buffer_positions_are_bytes: true,
}
}
#[derive(Clone)]
enum CompiledSearchPattern {
Emacs(CompiledPattern),
Literal(String),
}
pub(crate) struct IteratedStringMatches {
pub capture_count: usize,
pub matches: Vec<Vec<Option<(usize, usize)>>>,
}
thread_local! {
static SEARCH_PATTERN_CACHE: RefCell<Vec<(bool, bool, String, CompiledSearchPattern)>> =
const { RefCell::new(Vec::new()) };
static LISP_REGEX_PATTERN_CACHE: RefCell<Vec<(bool, bool, bool, Vec<u8>, CompiledPattern)>> =
const { RefCell::new(Vec::new()) };
}
#[derive(Clone, Debug)]
pub struct MatchData {
pub groups: Vec<Option<(usize, usize)>>,
pub searched_string: Option<SearchedString>,
pub searched_buffer: Option<BufferId>,
pub buffer_positions_are_bytes: bool,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum SearchedString {
Heap(super::value::Value),
Owned(LispString),
}
impl SearchedString {
pub(crate) fn as_lisp_string(&self) -> Option<&LispString> {
match self {
Self::Heap(val) => val.as_lisp_string(),
Self::Owned(text) => Some(text),
}
}
fn byte_to_char_pos(&self, byte_pos: usize) -> usize {
let Some(string) = self.as_lisp_string() else {
return 0;
};
if string.is_multibyte() {
crate::emacs_core::emacs_char::byte_to_char_pos(string.as_bytes(), byte_pos)
} else {
byte_pos.min(string.byte_len())
}
}
pub(crate) fn to_owned(&self) -> String {
let Some(string) = self.as_lisp_string() else {
return String::new();
};
string
.as_utf8_str()
.map(str::to_owned)
.unwrap_or_else(|| String::from_utf8_lossy(string.as_bytes()).into_owned())
}
}
pub fn char_pos_to_byte_lisp_string(s: &crate::heap_types::LispString, char_pos: usize) -> usize {
if !s.is_multibyte() {
return char_pos.min(s.byte_len());
}
if char_pos >= s.schars() {
return s.byte_len();
}
crate::emacs_core::emacs_char::char_to_byte_pos(s.as_bytes(), char_pos)
}
impl MatchData {
pub(crate) fn uses_buffer_byte_positions(&self) -> bool {
self.searched_buffer.is_some() && self.buffer_positions_are_bytes
}
pub(crate) fn searched_string_text(&self) -> Option<String> {
self.searched_string.as_ref().map(SearchedString::to_owned)
}
}
pub fn translate_emacs_regex(pattern: &str) -> String {
fn next_char_at(s: &str, byte_idx: usize) -> Option<(char, usize)> {
s.get(byte_idx..)
.and_then(|tail| tail.chars().next().map(|ch| (ch, ch.len_utf8())))
}
fn push_rust_class_char(out: &mut String, ch: char) {
match ch {
'\\' => out.push_str("\\\\"),
'[' => out.push_str("\\["),
_ => out.push(ch),
}
}
let mut out = String::with_capacity(pattern.len() + 8);
let bytes = pattern.as_bytes();
let len = bytes.len();
let mut i = 0;
let mut in_bracket = false;
let mut bracket_negated = false;
let mut bracket_content_start: usize = 0;
while i < len {
let (ch, ch_len) = next_char_at(pattern, i).expect("byte index must be char boundary");
if !ch.is_ascii() {
out.push(ch);
i += ch_len;
continue;
}
if in_bracket {
if ch == ']' {
in_bracket = false;
if out.len() == bracket_content_start {
let open_len = if bracket_negated { 2 } else { 1 };
out.truncate(bracket_content_start - open_len);
if bracket_negated {
out.push_str("[\\s\\S]");
} else {
out.push_str("[^\\s\\S]");
}
} else {
out.push(']');
}
i += 1;
continue;
}
if ch == '\\' {
if i + 1 < len && bytes[i + 1] == b']' {
push_rust_class_char(&mut out, ch);
i += 1;
continue;
}
if i + 2 < len && bytes[i + 1] == b'-' && bytes[i + 2] != b']' {
let (end_ch, end_len) =
next_char_at(pattern, i + 2).expect("byte index must be char boundary");
if ch > end_ch {
i += 1 + 1 + end_len;
continue;
}
push_rust_class_char(&mut out, ch);
out.push('-');
push_rust_class_char(&mut out, end_ch);
i += 1 + 1 + end_len;
} else {
push_rust_class_char(&mut out, ch);
i += 1;
}
continue;
}
if ch == '[' {
if i + 1 < len && bytes[i + 1] == b':' {
out.push('[');
} else {
out.push_str("\\[");
}
i += 1;
continue;
}
if i + 2 < len && bytes[i + 1] == b'-' && bytes[i + 2] != b']' {
let (end_ch, end_len) =
next_char_at(pattern, i + 2).expect("byte index must be char boundary");
if ch > end_ch {
i += 1 + 1 + end_len;
continue;
}
}
out.push(ch);
i += ch_len;
continue;
}
match ch {
'[' => {
in_bracket = true;
bracket_negated = false;
out.push('[');
i += 1;
if i < len && bytes[i] == b'^' {
out.push('^');
bracket_negated = true;
i += 1;
}
bracket_content_start = out.len();
if i < len && bytes[i] == b']' {
out.push_str("\\]");
i += 1;
}
}
'(' => {
out.push_str("\\(");
i += 1;
}
')' => {
out.push_str("\\)");
i += 1;
}
'{' => {
out.push_str("\\{");
i += 1;
}
'}' => {
out.push_str("\\}");
i += 1;
}
'|' => {
out.push_str("\\|");
i += 1;
}
'\\' if i + 1 < len => {
let (next, next_len) =
next_char_at(pattern, i + 1).expect("byte index must be char boundary");
match next {
'(' => {
let group_idx = i + 1 + next_len;
if group_idx < len && bytes[group_idx] == b'?' {
if group_idx + 1 < len && bytes[group_idx + 1] == b':' {
out.push_str("(?:");
i = group_idx + 2;
continue;
}
let digits_start = group_idx + 1;
let mut digits_end = digits_start;
while digits_end < len && bytes[digits_end].is_ascii_digit() {
digits_end += 1;
}
if digits_end > digits_start
&& digits_end < len
&& bytes[digits_end] == b':'
{
out.push('(');
i = digits_end + 1;
continue;
}
}
out.push('(');
i += 1 + next_len;
}
')' => {
out.push(')');
i += 1 + next_len;
}
'|' => {
out.push('|');
i += 1 + next_len;
}
'{' => {
let interval_start = i + 1 + next_len;
let mut scan = interval_start;
let mut closed_interval = false;
while scan < len {
if bytes[scan] == b'\\' && scan + 1 < len && bytes[scan + 1] == b'}' {
let interval = &pattern[interval_start..scan];
out.push('{');
if let Some(rest) = interval.strip_prefix(',') {
out.push('0');
out.push(',');
out.push_str(rest);
} else {
out.push_str(interval);
}
out.push('}');
i = scan + 2;
closed_interval = true;
break;
}
scan += 1;
}
if closed_interval {
continue;
}
out.push('{');
i += 1 + next_len;
}
'}' => {
out.push('}');
i += 1 + next_len;
}
'`' => {
out.push_str("\\A");
i += 1 + next_len;
}
'\'' => {
out.push_str("\\z");
i += 1 + next_len;
}
'<' => {
out.push_str("\\b");
i += 1 + next_len;
}
'>' => {
out.push_str("\\b");
i += 1 + next_len;
}
'_' => {
i += 1 + next_len;
if i < len {
let (boundary_ch, boundary_len) =
next_char_at(pattern, i).expect("byte index must be char boundary");
match boundary_ch {
'<' | '>' => {
i += boundary_len;
out.push_str("\\b");
}
_ => {
out.push('_');
}
}
} else {
out.push('_');
}
}
'1'..='9' => {
out.push('\\');
out.push(next);
i += 1 + next_len;
}
's' => {
i += 1 + next_len;
if i < len {
let (class_ch, class_len) =
next_char_at(pattern, i).expect("byte index must be char boundary");
match class_ch {
'-' | ' ' => {
i += class_len;
out.push_str("\\s");
}
'w' => {
i += class_len;
out.push_str("\\w");
}
'_' => {
i += class_len;
out.push_str("[\\w_]");
}
'.' => {
i += class_len;
out.push_str("[[:punct:]]");
}
'(' => {
i += class_len;
out.push_str("[\\[\\(\\{]");
}
')' => {
i += class_len;
out.push_str("[\\]\\)\\}]");
}
'"' => {
i += class_len;
out.push_str("[\"']");
}
'\'' | '<' | '>' | '!' | '|' | '/' => {
i += class_len;
out.push_str("\\s");
}
_ => {
out.push_str("\\s");
}
}
} else {
out.push_str("\\s");
}
}
'S' => {
i += 1 + next_len;
if i < len {
let (class_ch, class_len) =
next_char_at(pattern, i).expect("byte index must be char boundary");
match class_ch {
'-' | ' ' => {
i += class_len;
out.push_str("\\S");
}
'w' => {
i += class_len;
out.push_str("\\W");
}
'_' => {
i += class_len;
out.push_str("[^\\w_]");
}
'.' => {
i += class_len;
out.push_str("[^[:punct:]]");
}
'(' => {
i += class_len;
out.push_str("[^\\[\\(\\{]");
}
')' => {
i += class_len;
out.push_str("[^\\]\\)\\}]");
}
'"' => {
i += class_len;
out.push_str("[^\"']");
}
'\'' | '<' | '>' | '!' | '|' | '/' => {
i += class_len;
out.push_str("\\S");
}
_ => {
out.push_str("\\S");
}
}
} else {
out.push_str("\\S");
}
}
'c' => {
i += 1 + next_len;
if i < len {
let (_, class_len) =
next_char_at(pattern, i).expect("byte index must be char boundary");
i += class_len;
}
out.push_str("[^\\x00-\\x7F]");
}
'=' => {
out.push_str("\\A");
i += 1 + next_len;
}
'w' | 'W' | 'b' | 'B' | 'd' | 'D' | 'n' | 't' | 'r' => match next {
_ => {
out.push('\\');
out.push(next);
i += 1 + next_len;
}
},
'\\' => {
out.push_str("\\\\");
i += 1 + next_len;
}
_ => {
if next.is_ascii() {
out.push('\\');
}
out.push(next);
i += 1 + next_len;
}
}
}
'\\' => {
out.push('\\');
i += 1;
}
_ => {
out.push(ch);
i += 1;
}
}
}
out
}
fn trivial_regexp_p(pattern: &str) -> bool {
let mut chars = pattern.chars();
while let Some(ch) = chars.next() {
match ch {
'.' | '*' | '+' | '?' | '[' | '^' | '$' => return false,
'\\' => {
let Some(next) = chars.next() else {
return false;
};
match next {
'|' | '(' | ')' | '`' | '\'' | 'b' | 'B' | '<' | '>' | 'w' | 'W' | 's'
| 'S' | '=' | '{' | '}' | '_' | 'c' | 'C' | '1' | '2' | '3' | '4' | '5'
| '6' | '7' | '8' | '9' | 'n' | 't' | 'r' => return false,
_ => {}
}
}
_ => {}
}
}
true
}
fn literal_from_trivial_regexp(pattern: &str) -> Option<String> {
if !trivial_regexp_p(pattern) {
return None;
}
let mut out = String::with_capacity(pattern.len());
let mut chars = pattern.chars();
while let Some(ch) = chars.next() {
if ch == '\\' {
out.push(chars.next()?);
} else {
out.push(ch);
}
}
Some(out)
}
fn compile_search_pattern(pattern: &str, case_fold: bool) -> Result<CompiledSearchPattern, String> {
compile_search_pattern_with_posix(pattern, case_fold, false)
}
fn compile_search_pattern_with_posix(
pattern: &str,
case_fold: bool,
posix: bool,
) -> Result<CompiledSearchPattern, String> {
if let Some(cached) = crate::emacs_core::perf_trace::time_op(
crate::emacs_core::perf_trace::HotpathOp::RegexCompileHit,
|| {
SEARCH_PATTERN_CACHE.with(|cache| {
let mut cache = cache.borrow_mut();
let index = cache.iter().position(
|(cached_posix, cached_case_fold, cached_pattern, _)| {
*cached_posix == posix
&& *cached_case_fold == case_fold
&& cached_pattern == pattern
},
)?;
let entry = cache.remove(index);
cache.insert(0, entry.clone());
Some(entry.3)
})
},
) {
return Ok(cached);
}
let compiled = crate::emacs_core::perf_trace::time_op(
crate::emacs_core::perf_trace::HotpathOp::RegexCompileMiss,
|| {
if let Some(literal) = literal_from_trivial_regexp(pattern)
&& (!case_fold || literal.is_ascii())
{
Ok(CompiledSearchPattern::Literal(literal))
} else {
regex_emacs::regex_compile(pattern, posix, case_fold)
.map(CompiledSearchPattern::Emacs)
.map_err(|e| format!("Invalid regexp: {}", e.message))
}
},
)?;
SEARCH_PATTERN_CACHE.with(|cache| {
let mut cache = cache.borrow_mut();
cache.insert(0, (posix, case_fold, pattern.to_string(), compiled.clone()));
if cache.len() > SEARCH_PATTERN_CACHE_SIZE {
cache.truncate(SEARCH_PATTERN_CACHE_SIZE);
}
});
Ok(compiled)
}
fn compile_lisp_pattern_with_posix(
pattern: &LispString,
case_fold: bool,
posix: bool,
target_multibyte: bool,
) -> Result<CompiledPattern, String> {
if let Some(cached) = crate::emacs_core::perf_trace::time_op(
crate::emacs_core::perf_trace::HotpathOp::RegexCompileHit,
|| {
LISP_REGEX_PATTERN_CACHE.with(|cache| {
let mut cache = cache.borrow_mut();
let index = cache.iter().position(
|(cached_posix, cached_case_fold, cached_multibyte, cached_pattern, _)| {
*cached_posix == posix
&& *cached_case_fold == case_fold
&& *cached_multibyte == pattern.is_multibyte()
&& cached_pattern.as_slice() == pattern.as_bytes()
},
)?;
let entry = cache.remove(index);
cache.insert(0, entry.clone());
Some(entry.4)
})
},
) {
let mut cached = cached;
cached.target_multibyte = target_multibyte;
return Ok(cached);
}
let mut compiled = crate::emacs_core::perf_trace::time_op(
crate::emacs_core::perf_trace::HotpathOp::RegexCompileMiss,
|| {
regex_emacs::regex_compile_lisp(pattern, posix, case_fold)
.map_err(|e| format!("Invalid regexp: {}", e.message))
},
)?;
compiled.target_multibyte = target_multibyte;
LISP_REGEX_PATTERN_CACHE.with(|cache| {
let mut cache = cache.borrow_mut();
cache.insert(
0,
(
posix,
case_fold,
pattern.is_multibyte(),
pattern.as_bytes().to_vec(),
compiled.clone(),
),
);
if cache.len() > SEARCH_PATTERN_CACHE_SIZE {
cache.truncate(SEARCH_PATTERN_CACHE_SIZE);
}
});
Ok(compiled)
}
fn compiled_capture_count(compiled: &CompiledSearchPattern) -> usize {
match compiled {
CompiledSearchPattern::Literal(_) => 1,
CompiledSearchPattern::Emacs(cp) => cp.re_nsub + 1,
}
}
fn find_forward_match_data_compiled(
compiled: &CompiledSearchPattern,
text: &str,
start: usize,
limit: usize,
offset: usize,
case_fold: bool,
) -> Option<MatchData> {
match compiled {
CompiledSearchPattern::Literal(literal) => {
let (match_start, match_end) = literal_find(&text[start..limit], literal, case_fold)?;
Some(MatchData {
groups: vec![Some((
offset + start + match_start,
offset + start + match_end,
))],
searched_string: None,
searched_buffer: None,
buffer_positions_are_bytes: false,
})
}
CompiledSearchPattern::Emacs(cp) => {
let syn = DefaultSyntaxLookup;
let text_bytes = text.as_bytes();
let range = (limit - start) as isize;
let result =
regex_emacs::re_search(cp, &text_bytes[..limit], start, range, &syn, start);
result.map(|(_pos, regs)| match_data_from_registers(®s, offset))
}
}
}
pub(crate) fn iterate_string_matches_with_case_fold(
pattern: &str,
string: &str,
start: usize,
case_fold: bool,
) -> Result<IteratedStringMatches, String> {
let compiled = compile_search_pattern(pattern, case_fold)?;
let capture_count = compiled_capture_count(&compiled);
if start > string.len() {
return Ok(IteratedStringMatches {
capture_count,
matches: Vec::new(),
});
}
let mut matches = Vec::new();
let mut search_at = start;
while search_at <= string.len() {
let Some(md) = find_forward_match_data_compiled(
&compiled,
string,
search_at,
string.len(),
0,
case_fold,
) else {
break;
};
let Some((match_start, match_end)) = md.groups.first().and_then(|group| *group) else {
break;
};
matches.push(md.groups);
if match_end > search_at {
search_at = match_end;
continue;
}
let Some(next_at) = next_search_char_boundary(string, match_end) else {
break;
};
if next_at <= search_at {
break;
}
search_at = next_at;
if match_start == match_end && search_at > string.len() {
break;
}
}
Ok(IteratedStringMatches {
capture_count,
matches,
})
}
fn string_char_match_data(searched_string: SearchedString, byte_md: MatchData) -> MatchData {
crate::emacs_core::perf_trace::time_op(
crate::emacs_core::perf_trace::HotpathOp::RegexMatchDataChars,
|| {
let char_groups = byte_md
.groups
.iter()
.map(|g| {
g.map(|(bs, be)| {
(
searched_string.byte_to_char_pos(bs),
searched_string.byte_to_char_pos(be),
)
})
})
.collect();
MatchData {
groups: char_groups,
searched_string: Some(searched_string),
searched_buffer: None,
buffer_positions_are_bytes: false,
}
},
)
}
fn single_group_match_data(start: usize, end: usize) -> MatchData {
MatchData {
groups: vec![Some((start, end))],
searched_string: None,
searched_buffer: None,
buffer_positions_are_bytes: false,
}
}
fn ascii_case_fold_find(haystack: &str, needle: &str) -> Option<usize> {
let needle_len = needle.len();
if needle_len == 0 {
return Some(0);
}
let haystack_bytes = haystack.as_bytes();
let needle_bytes = needle.as_bytes();
if needle_len > haystack_bytes.len() {
return None;
}
haystack_bytes.windows(needle_len).position(|window| {
window
.iter()
.zip(needle_bytes.iter())
.all(|(lhs, rhs)| lhs.eq_ignore_ascii_case(rhs))
})
}
fn ascii_case_fold_rfind(haystack: &str, needle: &str) -> Option<usize> {
let needle_len = needle.len();
if needle_len == 0 {
return Some(haystack.len());
}
let haystack_bytes = haystack.as_bytes();
let needle_bytes = needle.as_bytes();
if needle_len > haystack_bytes.len() {
return None;
}
haystack_bytes.windows(needle_len).rposition(|window| {
window
.iter()
.zip(needle_bytes.iter())
.all(|(lhs, rhs)| lhs.eq_ignore_ascii_case(rhs))
})
}
fn unicode_case_fold_literal_find(text: &str, literal: &str) -> Option<(usize, usize)> {
let needle: Vec<char> = literal.chars().flat_map(|ch| ch.to_lowercase()).collect();
if needle.is_empty() {
return Some((0, 0));
}
let mut window = std::collections::VecDeque::with_capacity(needle.len());
let mut ranges = std::collections::VecDeque::with_capacity(needle.len());
for (byte_start, ch) in text.char_indices() {
let byte_end = byte_start + ch.len_utf8();
for folded_ch in ch.to_lowercase() {
window.push_back(folded_ch);
ranges.push_back((byte_start, byte_end));
if window.len() > needle.len() {
window.pop_front();
ranges.pop_front();
}
if window.len() == needle.len()
&& window
.iter()
.zip(needle.iter())
.all(|(lhs, rhs)| lhs == rhs)
{
return Some((ranges.front()?.0, ranges.back()?.1));
}
}
}
None
}
fn unicode_case_fold_literal_rfind(text: &str, literal: &str) -> Option<(usize, usize)> {
let needle: Vec<char> = literal.chars().flat_map(|ch| ch.to_lowercase()).collect();
if needle.is_empty() {
return Some((text.len(), text.len()));
}
let mut last_match = None;
let mut window = std::collections::VecDeque::with_capacity(needle.len());
let mut ranges = std::collections::VecDeque::with_capacity(needle.len());
for (byte_start, ch) in text.char_indices() {
let byte_end = byte_start + ch.len_utf8();
for folded_ch in ch.to_lowercase() {
window.push_back(folded_ch);
ranges.push_back((byte_start, byte_end));
if window.len() > needle.len() {
window.pop_front();
ranges.pop_front();
}
if window.len() == needle.len()
&& window
.iter()
.zip(needle.iter())
.all(|(lhs, rhs)| lhs == rhs)
{
last_match = Some((ranges.front()?.0, ranges.back()?.1));
}
}
}
last_match
}
fn literal_find(text: &str, literal: &str, case_fold: bool) -> Option<(usize, usize)> {
crate::emacs_core::perf_trace::time_op(
crate::emacs_core::perf_trace::HotpathOp::RegexLiteralFind,
|| {
let start = if case_fold {
if literal.is_ascii() {
ascii_case_fold_find(text, literal)?
} else {
return unicode_case_fold_literal_find(text, literal);
}
} else {
text.find(literal)?
};
Some((start, start + literal.len()))
},
)
}
fn literal_find_lisp_string(
text: &crate::heap_types::LispString,
literal: &str,
start: usize,
case_fold: bool,
) -> Option<(usize, usize)> {
crate::emacs_core::perf_trace::time_op(
crate::emacs_core::perf_trace::HotpathOp::RegexLiteralFind,
|| {
if start > text.byte_len() {
return None;
}
if !text.is_multibyte() {
let haystack = &text.as_bytes()[start..];
let needle = literal.as_bytes();
if needle.is_empty() {
return Some((start, start));
}
if needle.len() > haystack.len() {
return None;
}
let match_start = haystack.windows(needle.len()).position(|window| {
if case_fold {
window
.iter()
.zip(needle.iter())
.all(|(lhs, rhs)| lhs.eq_ignore_ascii_case(rhs))
} else {
window == needle
}
})?;
let match_end = match_start + needle.len();
return Some((start + match_start, start + match_end));
}
let text = text.as_utf8_str()?;
literal_find(&text[start..], literal, case_fold)
.map(|(match_start, match_end)| (start + match_start, start + match_end))
},
)
}
fn literal_rfind(text: &str, literal: &str, case_fold: bool) -> Option<(usize, usize)> {
crate::emacs_core::perf_trace::time_op(
crate::emacs_core::perf_trace::HotpathOp::RegexLiteralFind,
|| {
let start = if case_fold {
if literal.is_ascii() {
ascii_case_fold_rfind(text, literal)?
} else {
return unicode_case_fold_literal_rfind(text, literal);
}
} else {
text.rfind(literal)?
};
Some((start, start + literal.len()))
},
)
}
fn bytes_equal_ascii_case_fold(left: &[u8], right: &[u8]) -> bool {
left.len() == right.len()
&& left
.iter()
.zip(right.iter())
.all(|(l, r)| l.eq_ignore_ascii_case(r))
}
fn literal_find_emacs_bytes(
text: &[u8],
literal: &[u8],
multibyte: bool,
case_fold: bool,
) -> Option<(usize, usize)> {
crate::emacs_core::perf_trace::time_op(
crate::emacs_core::perf_trace::HotpathOp::RegexLiteralFind,
|| {
if literal.is_empty() {
return Some((0, 0));
}
if !case_fold {
return text
.windows(literal.len())
.position(|window| window == literal)
.map(|start| (start, start + literal.len()));
}
if literal.is_ascii() {
return text
.windows(literal.len())
.position(|window| bytes_equal_ascii_case_fold(window, literal))
.map(|start| (start, start + literal.len()));
}
if let (Some(text_utf8), Some(literal_utf8)) = (
crate::emacs_core::emacs_char::try_as_utf8(text),
crate::emacs_core::emacs_char::try_as_utf8(literal),
) {
return literal_find(text_utf8, literal_utf8, true);
}
let text_storage =
crate::emacs_core::string_escape::emacs_bytes_to_storage_string(text, multibyte);
let literal_storage =
crate::emacs_core::string_escape::emacs_bytes_to_storage_string(literal, multibyte);
literal_find(&text_storage, &literal_storage, true).map(|(start, end)| {
(
crate::emacs_core::string_escape::storage_byte_to_logical_byte(
&text_storage,
start,
),
crate::emacs_core::string_escape::storage_byte_to_logical_byte(
&text_storage,
end,
),
)
})
},
)
}
fn literal_rfind_emacs_bytes(
text: &[u8],
literal: &[u8],
multibyte: bool,
case_fold: bool,
) -> Option<(usize, usize)> {
crate::emacs_core::perf_trace::time_op(
crate::emacs_core::perf_trace::HotpathOp::RegexLiteralFind,
|| {
if literal.is_empty() {
return Some((text.len(), text.len()));
}
if !case_fold {
return text
.windows(literal.len())
.enumerate()
.rev()
.find(|(_, window)| *window == literal)
.map(|(start, _)| (start, start + literal.len()));
}
if literal.is_ascii() {
return text
.windows(literal.len())
.enumerate()
.rev()
.find(|(_, window)| bytes_equal_ascii_case_fold(window, literal))
.map(|(start, _)| (start, start + literal.len()));
}
if let (Some(text_utf8), Some(literal_utf8)) = (
crate::emacs_core::emacs_char::try_as_utf8(text),
crate::emacs_core::emacs_char::try_as_utf8(literal),
) {
return literal_rfind(text_utf8, literal_utf8, true);
}
let text_storage =
crate::emacs_core::string_escape::emacs_bytes_to_storage_string(text, multibyte);
let literal_storage =
crate::emacs_core::string_escape::emacs_bytes_to_storage_string(literal, multibyte);
literal_rfind(&text_storage, &literal_storage, true).map(|(start, end)| {
(
crate::emacs_core::string_escape::storage_byte_to_logical_byte(
&text_storage,
start,
),
crate::emacs_core::string_escape::storage_byte_to_logical_byte(
&text_storage,
end,
),
)
})
},
)
}
fn next_search_char_boundary(text: &str, pos: usize) -> Option<usize> {
if pos >= text.len() {
return None;
}
text[pos..].chars().next().map(|ch| pos + ch.len_utf8())
}
pub fn search_forward(
buf: &mut Buffer,
pattern: &str,
bound: Option<usize>,
noerror: bool,
case_fold: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
let start = buf.pt_byte;
let limit = bound.unwrap_or(buf.zv_byte).min(buf.zv_byte);
if start > limit {
if noerror {
return Ok(None);
}
return Err(format!("Search failed: \"{}\"", pattern));
}
let mut text = Vec::new();
buf.copy_emacs_bytes_to(start, limit, &mut text);
let literal = crate::emacs_core::string_escape::storage_string_to_buffer_bytes(
pattern,
buf.get_multibyte(),
);
let found = literal_find_emacs_bytes(&text, &literal, buf.get_multibyte(), case_fold);
if let Some((rel_start, rel_end)) = found {
let match_start = start + rel_start;
let match_end = start + rel_end;
buf.goto_byte(match_end);
*match_data = Some(MatchData {
groups: vec![Some((match_start, match_end))],
searched_string: None,
searched_buffer: Some(buf.id),
buffer_positions_are_bytes: true,
});
Ok(Some(match_end))
} else if noerror {
Ok(None)
} else {
Err(format!("Search failed: \"{}\"", pattern))
}
}
pub fn search_backward(
buf: &mut Buffer,
pattern: &str,
bound: Option<usize>,
noerror: bool,
case_fold: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
let end = buf.pt_byte;
let limit = bound.unwrap_or(buf.begv_byte).max(buf.begv_byte);
if end < limit {
if noerror {
return Ok(None);
}
return Err(format!("Search failed: \"{}\"", pattern));
}
let mut text = Vec::new();
buf.copy_emacs_bytes_to(limit, end, &mut text);
let literal = crate::emacs_core::string_escape::storage_string_to_buffer_bytes(
pattern,
buf.get_multibyte(),
);
let found = literal_rfind_emacs_bytes(&text, &literal, buf.get_multibyte(), case_fold);
if let Some((rel_start, rel_end)) = found {
let match_start = limit + rel_start;
let match_end = limit + rel_end;
buf.goto_byte(match_start);
*match_data = Some(MatchData {
groups: vec![Some((match_start, match_end))],
searched_string: None,
searched_buffer: Some(buf.id),
buffer_positions_are_bytes: true,
});
Ok(Some(match_start))
} else if noerror {
Ok(None)
} else {
Err(format!("Search failed: \"{}\"", pattern))
}
}
pub fn re_search_forward(
buf: &mut Buffer,
pattern: &str,
bound: Option<usize>,
noerror: bool,
case_fold: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
re_search_forward_with_posix(buf, pattern, bound, noerror, case_fold, false, match_data)
}
pub fn re_search_forward_with_posix(
buf: &mut Buffer,
pattern: &str,
bound: Option<usize>,
noerror: bool,
case_fold: bool,
posix: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
let start = buf.pt_byte;
let limit = bound.unwrap_or(buf.zv_byte).min(buf.zv_byte);
if start > limit {
if noerror {
return Ok(None);
}
return Err(format!("Search failed: \"{}\"", pattern));
}
let region_start = buf.begv_byte;
let mut text = Vec::new();
buf.copy_emacs_bytes_to(region_start, buf.zv_byte, &mut text);
let start_rel = start - region_start;
let limit_rel = limit - region_start;
let md_opt = match compile_search_pattern_with_posix(pattern, case_fold, posix)? {
CompiledSearchPattern::Literal(literal) => {
let literal_bytes = crate::emacs_core::string_escape::storage_string_to_buffer_bytes(
&literal,
buf.get_multibyte(),
);
literal_find_emacs_bytes(
&text[start_rel..limit_rel],
&literal_bytes,
buf.get_multibyte(),
case_fold,
)
.map(|(rel_start, rel_end)| MatchData {
groups: vec![Some((start + rel_start, start + rel_end))],
searched_string: None,
searched_buffer: Some(buf.id),
buffer_positions_are_bytes: true,
})
}
CompiledSearchPattern::Emacs(cp) => {
let syn = BufferSyntaxLookup {
syntax_table: crate::emacs_core::syntax::SyntaxTable::for_buffer(buf),
};
let range = (limit_rel - start_rel) as isize;
regex_emacs::re_search(&cp, &text[..limit_rel], start_rel, range, &syn, start_rel).map(
|(_pos, regs)| {
let mut md = buffer_match_data_from_registers(®s, region_start);
md.searched_buffer = Some(buf.id);
md
},
)
}
};
if let Some(md) = md_opt {
let full_match = md.groups[0].unwrap();
buf.goto_byte(full_match.1);
*match_data = Some(md);
Ok(Some(full_match.1))
} else if noerror {
Ok(None)
} else {
Err(format!("Search failed: \"{}\"", pattern))
}
}
pub fn re_search_backward(
buf: &mut Buffer,
pattern: &str,
bound: Option<usize>,
noerror: bool,
case_fold: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
re_search_backward_with_posix(buf, pattern, bound, noerror, case_fold, false, match_data)
}
pub fn re_search_backward_with_posix(
buf: &mut Buffer,
pattern: &str,
bound: Option<usize>,
noerror: bool,
case_fold: bool,
posix: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
let end = buf.pt_byte;
let limit = bound.unwrap_or(buf.begv_byte).max(buf.begv_byte);
if end < limit {
if noerror {
return Ok(None);
}
return Err(format!("Search failed: \"{}\"", pattern));
}
let region_start = buf.begv_byte;
let mut text = Vec::new();
buf.copy_emacs_bytes_to(region_start, buf.zv_byte, &mut text);
let start_rel = end - region_start;
let limit_rel = limit - region_start;
let md_opt = match compile_search_pattern_with_posix(pattern, case_fold, posix)? {
CompiledSearchPattern::Literal(literal) => {
let literal_bytes = crate::emacs_core::string_escape::storage_string_to_buffer_bytes(
&literal,
buf.get_multibyte(),
);
literal_rfind_emacs_bytes(
&text[limit_rel..start_rel],
&literal_bytes,
buf.get_multibyte(),
case_fold,
)
.map(|(rel_start, rel_end)| MatchData {
groups: vec![Some((
region_start + limit_rel + rel_start,
region_start + limit_rel + rel_end,
))],
searched_string: None,
searched_buffer: Some(buf.id),
buffer_positions_are_bytes: true,
})
}
CompiledSearchPattern::Emacs(cp) => {
let syn = BufferSyntaxLookup {
syntax_table: crate::emacs_core::syntax::SyntaxTable::for_buffer(buf),
};
let range = -((start_rel - limit_rel) as isize);
regex_emacs::re_search(&cp, &text, start_rel, range, &syn, start_rel).map(
|(_pos, regs)| {
let mut md = buffer_match_data_from_registers(®s, region_start);
md.searched_buffer = Some(buf.id);
md
},
)
}
};
if let Some(md) = md_opt {
let full_match = md.groups[0].unwrap();
buf.goto_byte(full_match.0);
*match_data = Some(md);
Ok(Some(full_match.0))
} else if noerror {
Ok(None)
} else {
Err(format!("Search failed: \"{}\"", pattern))
}
}
pub(crate) fn re_search_forward_lisp_with_posix(
buf: &mut Buffer,
pattern: &LispString,
bound: Option<usize>,
noerror: bool,
case_fold: bool,
posix: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
let start = buf.pt_byte;
let limit = bound.unwrap_or(buf.zv_byte).min(buf.zv_byte);
if start > limit {
if noerror {
return Ok(None);
}
return Err("Search failed".to_string());
}
let region_start = buf.begv_byte;
let mut text = Vec::new();
buf.copy_emacs_bytes_to(region_start, buf.zv_byte, &mut text);
let start_rel = start - region_start;
let limit_rel = limit - region_start;
let compiled = compile_lisp_pattern_with_posix(pattern, case_fold, posix, buf.get_multibyte())?;
let syn = BufferSyntaxLookup {
syntax_table: crate::emacs_core::syntax::SyntaxTable::for_buffer(buf),
};
if let Some((_pos, regs)) = regex_emacs::re_search(
&compiled,
&text[..limit_rel],
start_rel,
(limit_rel - start_rel) as isize,
&syn,
start_rel,
) {
let mut md = buffer_match_data_from_registers(®s, region_start);
md.searched_buffer = Some(buf.id);
let full_match = md.groups[0].unwrap();
buf.goto_byte(full_match.1);
*match_data = Some(md);
Ok(Some(full_match.1))
} else if noerror {
Ok(None)
} else {
Err("Search failed".to_string())
}
}
pub(crate) fn re_search_backward_lisp_with_posix(
buf: &mut Buffer,
pattern: &LispString,
bound: Option<usize>,
noerror: bool,
case_fold: bool,
posix: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
let end = buf.pt_byte;
let limit = bound.unwrap_or(buf.begv_byte).max(buf.begv_byte);
if end < limit {
if noerror {
return Ok(None);
}
return Err("Search failed".to_string());
}
let region_start = buf.begv_byte;
let mut text = Vec::new();
buf.copy_emacs_bytes_to(region_start, buf.zv_byte, &mut text);
let start_rel = end - region_start;
let limit_rel = limit - region_start;
let compiled = compile_lisp_pattern_with_posix(pattern, case_fold, posix, buf.get_multibyte())?;
let syn = BufferSyntaxLookup {
syntax_table: crate::emacs_core::syntax::SyntaxTable::for_buffer(buf),
};
if let Some((_pos, regs)) = regex_emacs::re_search(
&compiled,
&text,
start_rel,
-((start_rel - limit_rel) as isize),
&syn,
start_rel,
) {
let mut md = buffer_match_data_from_registers(®s, region_start);
md.searched_buffer = Some(buf.id);
let full_match = md.groups[0].unwrap();
buf.goto_byte(full_match.0);
*match_data = Some(md);
Ok(Some(full_match.0))
} else if noerror {
Ok(None)
} else {
Err("Search failed".to_string())
}
}
pub fn looking_at(
buf: &Buffer,
pattern: &str,
case_fold: bool,
match_data: &mut Option<MatchData>,
) -> Result<bool, String> {
looking_at_with_posix(buf, pattern, case_fold, false, match_data)
}
pub fn looking_at_with_posix(
buf: &Buffer,
pattern: &str,
case_fold: bool,
posix: bool,
match_data: &mut Option<MatchData>,
) -> Result<bool, String> {
let start = buf.pt_byte;
if start > buf.zv_byte {
return Ok(false);
}
let region_start = buf.begv_byte;
let mut text = Vec::new();
buf.copy_emacs_bytes_to(region_start, buf.zv_byte, &mut text);
let start_rel = start - region_start;
match compile_search_pattern_with_posix(pattern, case_fold, posix)? {
CompiledSearchPattern::Literal(literal) => {
let literal_bytes = crate::emacs_core::string_escape::storage_string_to_buffer_bytes(
&literal,
buf.get_multibyte(),
);
let tail = &text[start_rel..];
let matched =
literal_find_emacs_bytes(tail, &literal_bytes, buf.get_multibyte(), case_fold)
.is_some_and(|(match_start, _)| match_start == 0);
if !matched {
return Ok(false);
}
let full_match = (start, start + literal_bytes.len());
*match_data = Some(MatchData {
groups: vec![Some(full_match)],
searched_string: None,
searched_buffer: Some(buf.id),
buffer_positions_are_bytes: true,
});
Ok(true)
}
CompiledSearchPattern::Emacs(cp) => {
let syn = BufferSyntaxLookup {
syntax_table: crate::emacs_core::syntax::SyntaxTable::for_buffer(buf),
};
if let Some((_end, regs)) =
regex_emacs::re_match(&cp, &text, start_rel, text.len(), &syn, start_rel)
{
let mut md = buffer_match_data_from_registers(®s, region_start);
md.searched_buffer = Some(buf.id);
*match_data = Some(md);
Ok(true)
} else {
Ok(false)
}
}
}
}
pub(crate) fn looking_at_lisp_with_posix(
buf: &Buffer,
pattern: &LispString,
case_fold: bool,
posix: bool,
match_data: &mut Option<MatchData>,
) -> Result<bool, String> {
let start = buf.pt_byte;
if start > buf.zv_byte {
return Ok(false);
}
let region_start = buf.begv_byte;
let mut text = Vec::new();
buf.text
.copy_emacs_bytes_to(region_start, buf.zv_byte, &mut text);
let start_rel = start - region_start;
let compiled = compile_lisp_pattern_with_posix(pattern, case_fold, posix, buf.get_multibyte())?;
let syn = BufferSyntaxLookup {
syntax_table: crate::emacs_core::syntax::SyntaxTable::for_buffer(buf),
};
if let Some((_end, regs)) =
regex_emacs::re_match(&compiled, &text, start_rel, text.len(), &syn, start_rel)
{
let mut md = buffer_match_data_from_registers(®s, region_start);
md.searched_buffer = Some(buf.id);
*match_data = Some(md);
Ok(true)
} else {
Ok(false)
}
}
pub fn looking_at_string(
pattern: &str,
string: &str,
case_fold: bool,
match_data: &mut Option<MatchData>,
) -> Result<bool, String> {
match compile_search_pattern(pattern, case_fold)? {
CompiledSearchPattern::Literal(literal) => {
let matched = literal_find(string, &literal, case_fold)
.is_some_and(|(match_start, _)| match_start == 0);
if !matched {
return Ok(false);
}
*match_data = Some(string_char_match_data(
SearchedString::Owned(LispString::from_utf8(string)),
single_group_match_data(0, literal.len()),
));
Ok(true)
}
CompiledSearchPattern::Emacs(cp) => {
let syn = DefaultSyntaxLookup;
let text_bytes = string.as_bytes();
if let Some((_end, regs)) =
regex_emacs::re_match(&cp, text_bytes, 0, text_bytes.len(), &syn, 0)
{
let byte_md = match_data_from_registers(®s, 0);
*match_data = Some(string_char_match_data(
SearchedString::Owned(LispString::from_utf8(string)),
byte_md,
));
Ok(true)
} else {
Ok(false)
}
}
}
}
pub fn string_match_full_with_case_fold(
pattern: &str,
string: &str,
start: usize,
case_fold: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
string_match_full_with_case_fold_and_posix(pattern, string, start, case_fold, false, match_data)
}
pub fn string_match_full_with_case_fold_and_posix(
pattern: &str,
string: &str,
start: usize,
case_fold: bool,
posix: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
string_match_full_with_case_fold_source_posix(
pattern,
string,
SearchedString::Owned(LispString::from_utf8(string)),
start,
case_fold,
posix,
match_data,
)
}
pub(crate) fn string_match_full_with_case_fold_source_lisp(
pattern: &str,
string: &crate::heap_types::LispString,
searched_string: SearchedString,
start: usize,
case_fold: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
string_match_full_with_case_fold_source_lisp_posix(
pattern,
string,
searched_string,
start,
case_fold,
false,
match_data,
)
}
pub(crate) fn string_match_full_with_case_fold_source_lisp_posix(
pattern: &str,
string: &crate::heap_types::LispString,
searched_string: SearchedString,
start: usize,
case_fold: bool,
posix: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
let pattern = LispString::from_utf8(pattern);
string_match_full_with_case_fold_source_lisp_pattern_posix(
&pattern,
string,
searched_string,
start,
case_fold,
posix,
match_data,
)
}
pub(crate) fn string_match_full_with_case_fold_source_lisp_pattern_posix(
pattern: &LispString,
string: &crate::heap_types::LispString,
searched_string: SearchedString,
start: usize,
case_fold: bool,
posix: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
if start > string.byte_len() {
return Ok(None);
}
let compiled =
compile_lisp_pattern_with_posix(pattern, case_fold, posix, string.is_multibyte())?;
let syn = DefaultSyntaxLookup;
let text_bytes = string.as_bytes();
let range = (text_bytes.len() - start) as isize;
if let Some((_pos, regs)) =
regex_emacs::re_search(&compiled, text_bytes, start, range, &syn, start)
{
let byte_md = match_data_from_registers(®s, 0);
let char_md = string_char_match_data(searched_string, byte_md);
let result_pos = char_md.groups[0].unwrap().0;
*match_data = Some(char_md);
Ok(Some(result_pos))
} else {
Ok(None)
}
}
pub(crate) fn string_match_full_with_case_fold_source(
pattern: &str,
string: &str,
searched_string: SearchedString,
start: usize,
case_fold: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
string_match_full_with_case_fold_source_posix(
pattern,
string,
searched_string,
start,
case_fold,
false,
match_data,
)
}
pub(crate) fn string_match_full_with_case_fold_source_posix(
pattern: &str,
string: &str,
searched_string: SearchedString,
start: usize,
case_fold: bool,
posix: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
if start > string.len() {
return Ok(None);
}
string_match_full_with_case_fold_source_compiled(
compile_search_pattern_with_posix(pattern, case_fold, posix)?,
string,
searched_string,
start,
case_fold,
match_data,
)
}
fn string_match_full_with_case_fold_source_compiled(
compiled: CompiledSearchPattern,
string: &str,
searched_string: SearchedString,
start: usize,
_case_fold: bool,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
match compiled {
CompiledSearchPattern::Literal(literal) => {
let byte_match = literal_find(&string[start..], &literal, _case_fold)
.map(|(match_start, match_end)| (start + match_start, start + match_end));
if let Some((byte_start, byte_end)) = byte_match {
let char_md = string_char_match_data(
searched_string,
single_group_match_data(byte_start, byte_end),
);
let result_pos = char_md.groups[0].unwrap().0;
*match_data = Some(char_md);
Ok(Some(result_pos))
} else {
Ok(None)
}
}
CompiledSearchPattern::Emacs(cp) => {
let syn = DefaultSyntaxLookup;
let text_bytes = string.as_bytes();
let range = (text_bytes.len() - start) as isize;
if let Some((_pos, regs)) =
regex_emacs::re_search(&cp, text_bytes, start, range, &syn, start)
{
let byte_md = match_data_from_registers(®s, 0);
let char_md = string_char_match_data(searched_string, byte_md);
let result_pos = char_md.groups[0].unwrap().0;
*match_data = Some(char_md);
Ok(Some(result_pos))
} else {
Ok(None)
}
}
}
}
pub fn string_match_full(
pattern: &str,
string: &str,
start: usize,
match_data: &mut Option<MatchData>,
) -> Result<Option<usize>, String> {
string_match_full_with_case_fold(pattern, string, start, true, match_data)
}
pub fn replace_match_buffer(
buf: &mut Buffer,
newtext: &str,
fixedcase: bool,
literal: bool,
subexp: usize,
match_data: &Option<MatchData>,
) -> Result<(), String> {
replace_match_buffer_with_syntax(buf, newtext, fixedcase, literal, subexp, match_data, false)
}
pub fn replace_match_buffer_with_syntax(
buf: &mut Buffer,
newtext: &str,
fixedcase: bool,
literal: bool,
subexp: usize,
match_data: &Option<MatchData>,
case_symbols_as_words: bool,
) -> Result<(), String> {
let (match_start, match_end, replacement) = compute_buffer_replacement_with_syntax(
buf,
newtext,
fixedcase,
literal,
subexp,
match_data,
case_symbols_as_words,
)?;
buf.goto_byte(match_start);
buf.delete_region(match_start, match_end);
buf.insert_lisp_string(&replacement);
Ok(())
}
pub(crate) fn compute_buffer_replacement_with_syntax(
buf: &Buffer,
newtext: &str,
fixedcase: bool,
literal: bool,
subexp: usize,
match_data: &Option<MatchData>,
case_symbols_as_words: bool,
) -> Result<(usize, usize, crate::heap_types::LispString), String> {
let md = match match_data {
Some(md) => md,
None => return Err(REPLACE_MATCH_SUBEXP_MISSING.to_string()),
};
let (_match_start, _match_end) = match md.groups.get(subexp) {
Some(Some(pair)) => *pair,
_ => return Err(REPLACE_MATCH_SUBEXP_MISSING.to_string()),
};
let source = crate::emacs_core::string_escape::emacs_bytes_to_storage_string(
&buf.buffer_substring_bytes(0, buf.total_bytes()),
buf.get_multibyte(),
);
let buf_syntax = crate::emacs_core::syntax::SyntaxTable::for_buffer(buf);
let (match_start, match_end) = match md.groups.get(subexp) {
Some(Some(pair)) => *pair,
_ => return Err(REPLACE_MATCH_SUBEXP_MISSING.to_string()),
};
let (buffer_start, buffer_end) = if md.searched_string.is_some() {
(
buf.text.char_to_emacs_byte(match_start),
buf.text.char_to_emacs_byte(match_end),
)
} else if md.searched_buffer.is_some() && !md.buffer_positions_are_bytes {
(
buf.text.char_to_emacs_byte(match_start.saturating_sub(1)),
buf.text.char_to_emacs_byte(match_end.saturating_sub(1)),
)
} else {
(match_start, match_end)
};
let (_storage_start, _storage_end, replacement) = compute_replacement_with_syntax(
newtext,
fixedcase,
literal,
subexp,
match_data,
&source,
Some(&buf_syntax),
case_symbols_as_words,
)?;
let replacement_bytes = crate::emacs_core::string_escape::storage_string_to_buffer_bytes(
&replacement,
buf.get_multibyte(),
);
let replacement = if buf.get_multibyte() {
crate::heap_types::LispString::from_emacs_bytes(replacement_bytes)
} else {
crate::heap_types::LispString::from_unibyte(replacement_bytes)
};
Ok((buffer_start, buffer_end, replacement))
}
pub fn replace_match_string(
source: &str,
newtext: &str,
fixedcase: bool,
literal: bool,
subexp: usize,
match_data: &Option<MatchData>,
) -> Result<String, String> {
replace_match_string_with_syntax(
source, newtext, fixedcase, literal, subexp, match_data, None, false,
)
}
pub fn replace_match_string_with_syntax(
source: &str,
newtext: &str,
fixedcase: bool,
literal: bool,
subexp: usize,
match_data: &Option<MatchData>,
syntax_table: Option<&crate::emacs_core::syntax::SyntaxTable>,
case_symbols_as_words: bool,
) -> Result<String, String> {
let (byte_start, byte_end, replacement) = compute_replacement_with_syntax(
newtext,
fixedcase,
literal,
subexp,
match_data,
source,
syntax_table,
case_symbols_as_words,
)?;
if byte_end > source.len() || byte_start > byte_end {
return Err(REPLACE_MATCH_SUBEXP_MISSING.to_string());
}
Ok(format!(
"{}{}{}",
&source[..byte_start],
replacement,
&source[byte_end..]
))
}
pub fn char_pos_to_byte(s: &str, char_pos: usize) -> usize {
s.char_indices()
.nth(char_pos)
.map(|(byte_pos, _)| byte_pos)
.unwrap_or(s.len())
}
fn compute_replacement(
newtext: &str,
fixedcase: bool,
literal: bool,
subexp: usize,
match_data: &Option<MatchData>,
source: &str,
) -> Result<(usize, usize, String), String> {
compute_replacement_with_syntax(
newtext, fixedcase, literal, subexp, match_data, source, None, false,
)
}
fn compute_replacement_with_syntax(
newtext: &str,
fixedcase: bool,
literal: bool,
subexp: usize,
match_data: &Option<MatchData>,
source: &str,
syntax_table: Option<&crate::emacs_core::syntax::SyntaxTable>,
case_symbols_as_words: bool,
) -> Result<(usize, usize, String), String> {
let md = match match_data {
Some(md) => md,
None => return Err(REPLACE_MATCH_SUBEXP_MISSING.to_string()),
};
let (match_start, match_end) = match md.groups.get(subexp) {
Some(Some(pair)) => *pair,
_ => return Err(REPLACE_MATCH_SUBEXP_MISSING.to_string()),
};
let string_positions_are_chars = md.searched_string.is_some();
let buffer_positions_are_lisp_chars =
md.searched_buffer.is_some() && !md.buffer_positions_are_bytes;
let uses_buffer_byte_positions = md.uses_buffer_byte_positions();
let (byte_start, byte_end) = if string_positions_are_chars {
(
char_pos_to_byte(source, match_start),
char_pos_to_byte(source, match_end),
)
} else if buffer_positions_are_lisp_chars {
(
char_pos_to_byte(source, match_start.saturating_sub(1)),
char_pos_to_byte(source, match_end.saturating_sub(1)),
)
} else if uses_buffer_byte_positions {
(
crate::emacs_core::string_escape::storage_logical_byte_to_storage_byte(
source,
match_start,
),
crate::emacs_core::string_escape::storage_logical_byte_to_storage_byte(
source, match_end,
),
)
} else {
(match_start, match_end)
};
if byte_end > source.len() || byte_start > byte_end {
return Err(REPLACE_MATCH_SUBEXP_MISSING.to_string());
}
let mut replacement = if literal {
newtext.to_string()
} else {
build_replacement(newtext, md, source, string_positions_are_chars)?
};
if !fixedcase {
let matched = &source[byte_start..byte_end];
replacement = apply_match_case_with_syntax(
&replacement,
matched,
syntax_table,
case_symbols_as_words,
);
}
Ok((byte_start, byte_end, replacement))
}
fn build_replacement(
template: &str,
md: &MatchData,
source: &str,
char_positions: bool,
) -> Result<String, String> {
const INVALID_BACKSLASH_MSG: &str = "Invalid use of `\\' in replacement text";
fn next_char_at(s: &str, byte_idx: usize) -> Option<(char, usize)> {
s.get(byte_idx..)
.and_then(|tail| tail.chars().next().map(|ch| (ch, ch.len_utf8())))
}
fn extract_group(
source: &str,
s: usize,
e: usize,
char_positions: bool,
emacs_byte_positions: bool,
) -> Option<&str> {
if char_positions {
let bs = char_pos_to_byte(source, s);
let be = char_pos_to_byte(source, e);
if be <= source.len() && bs <= be {
Some(&source[bs..be])
} else {
None
}
} else if emacs_byte_positions {
let bs =
crate::emacs_core::string_escape::storage_logical_byte_to_storage_byte(source, s);
let be =
crate::emacs_core::string_escape::storage_logical_byte_to_storage_byte(source, e);
if be <= source.len() && bs <= be {
Some(&source[bs..be])
} else {
None
}
} else if e <= source.len() && s <= e {
Some(&source[s..e])
} else {
None
}
}
let mut out = String::with_capacity(template.len());
let emacs_byte_positions = md.uses_buffer_byte_positions() && !char_positions;
let bytes = template.as_bytes();
let len = bytes.len();
let mut i = 0;
while i < len {
if bytes[i] == b'\\' && i + 1 < len {
let (next, next_len) =
next_char_at(template, i + 1).expect("byte index must be char boundary");
match next {
'&' => {
if let Some(Some((s, e))) = md.groups.first() {
if let Some(text) =
extract_group(source, *s, *e, char_positions, emacs_byte_positions)
{
out.push_str(text);
}
}
i += 1 + next_len;
}
'1'..='9' => {
let group = (next as u8 - b'0') as usize;
if let Some(Some((s, e))) = md.groups.get(group) {
if let Some(text) =
extract_group(source, *s, *e, char_positions, emacs_byte_positions)
{
out.push_str(text);
}
}
i += 1 + next_len;
}
'\\' => {
out.push('\\');
i += 1 + next_len;
}
'?' => {
out.push('\\');
out.push('?');
i += 1 + next_len;
}
_ => {
return Err(INVALID_BACKSLASH_MSG.to_string());
}
}
} else {
let (ch, ch_len) = next_char_at(template, i).expect("byte index must be char boundary");
out.push(ch);
i += ch_len;
}
}
Ok(out)
}
fn apply_match_case(replacement: &str, matched: &str) -> String {
apply_replace_match_case(replacement, matched)
}
fn apply_match_case_with_syntax(
replacement: &str,
matched: &str,
syntax_table: Option<&crate::emacs_core::syntax::SyntaxTable>,
case_symbols_as_words: bool,
) -> String {
use crate::emacs_core::casefiddle::apply_replace_match_case_with;
use crate::emacs_core::syntax::SyntaxClass;
match syntax_table {
None => apply_replace_match_case(replacement, matched),
Some(table) => apply_replace_match_case_with(replacement, matched, move |ch| {
let class = table.char_syntax(ch);
class == SyntaxClass::Word || (case_symbols_as_words && class == SyntaxClass::Symbol)
}),
}
}
#[cfg(test)]
#[path = "regex_test.rs"]
mod tests;