pub(super) struct FirstByteIndex {
table: [bool; 256],
needles: [u8; 3],
needle_len: usize,
overflow: bool,
}
impl FirstByteIndex {
pub(super) fn from_patterns(patterns: &[String]) -> Self {
let mut table = [false; 256];
let mut needles = [0u8; 3];
let mut needle_len = 0usize;
let mut overflow = false;
for pat in patterns {
let Some(&first) = pat.as_bytes().first() else {
continue;
};
for cand in [first.to_ascii_lowercase(), first.to_ascii_uppercase()] {
if table[cand as usize] {
continue;
}
table[cand as usize] = true;
if needle_len < needles.len() {
needles[needle_len] = cand;
}
needle_len += 1;
}
}
if needle_len > needles.len() {
overflow = true;
}
Self { table, needles, needle_len, overflow }
}
#[inline]
fn next(&self, hay: &[u8]) -> Option<usize> {
if self.overflow {
return hay.iter().position(|&b| self.table[b as usize]);
}
match self.needle_len {
1 => memchr::memchr(self.needles[0], hay),
2 => memchr::memchr2(self.needles[0], self.needles[1], hay),
3 => memchr::memchr3(self.needles[0], self.needles[1], self.needles[2], hay),
_ => None,
}
}
}
pub(super) fn find_autolink_match(
s: &str,
from: usize,
patterns: &[String],
index: &FirstByteIndex,
) -> Option<(usize, usize)> {
let bytes = s.as_bytes();
let mut base = from;
while base < bytes.len() {
let rel = index.next(&bytes[base..])?;
let i = base + rel;
let is_boundary = i == 0 || !bytes[i - 1].is_ascii_alphanumeric();
if is_boundary {
for pat in patterns {
let pat_bytes = pat.as_bytes();
if pat_bytes.is_empty() {
continue;
}
if i + pat_bytes.len() <= bytes.len()
&& bytes[i..i + pat_bytes.len()].eq_ignore_ascii_case(pat_bytes)
{
let url_start = i;
let mut url_end = i + pat_bytes.len();
while url_end < bytes.len() && is_url_byte(bytes[url_end]) {
url_end += 1;
}
if url_end == i + pat_bytes.len() {
continue;
}
url_end = trim_trailing_punct(bytes, url_start, url_end);
return Some((url_start, url_end));
}
}
}
base = i + 1;
}
None
}
#[inline]
fn is_url_byte(byte: u8) -> bool {
!matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | b'<' | b'>' | b'"' | b'\'' | b'`')
}
fn trim_trailing_punct(bytes: &[u8], start: usize, mut end: usize) -> usize {
while end > start {
let b = bytes[end - 1];
match b {
b'.' | b',' | b';' | b':' | b'!' | b'?' => end -= 1,
b')' | b']' | b'}' => {
let (open, close) = match b {
b')' => (b'(', b')'),
b']' => (b'[', b']'),
_ => (b'{', b'}'),
};
let mut opens = 0usize;
let mut closes = 0usize;
for &x in &bytes[start..end - 1] {
if x == open {
opens += 1;
} else if x == close {
closes += 1;
}
}
if closes >= opens {
end -= 1;
} else {
break;
}
}
_ => break,
}
}
end
}