use std::borrow::Cow;
use std::sync::LazyLock;
use memchr::memmem::Finder;
use rolldown_error::{BuildDiagnostic, InvalidOptionType, SingleBuildResult};
use rustc_hash::FxHashMap;
use crate::indexmap::FxIndexSet;
const HASH_PLACEHOLDER_LEFT: &str = "!~{";
const HASH_PLACEHOLDER_RIGHT: &str = "}~";
const HASH_PLACEHOLDER_OVERHEAD: usize = HASH_PLACEHOLDER_LEFT.len() + HASH_PLACEHOLDER_RIGHT.len();
const MAX_HASH_SIZE: usize = 21;
const DEFAULT_HASH_SIZE: usize = 8;
pub static HASH_PLACEHOLDER_LEFT_FINDER: LazyLock<Finder<'static>> =
LazyLock::new(|| Finder::new(HASH_PLACEHOLDER_LEFT));
fn is_hash_placeholder(s: &str) -> bool {
if !s.starts_with(HASH_PLACEHOLDER_LEFT) || !s.ends_with(HASH_PLACEHOLDER_RIGHT) {
return false;
}
let content = &s[HASH_PLACEHOLDER_LEFT.len()..s.len() - HASH_PLACEHOLDER_RIGHT.len()];
if content.is_empty() || content.len() > 17 {
return false;
}
content.bytes().all(|c| c.is_ascii_alphanumeric() || c == b'_' || c == b'$')
}
pub struct HashPlaceholderIter<'a> {
s: &'a str,
finder: &'a Finder<'static>,
start: usize,
}
impl<'a> Iterator for HashPlaceholderIter<'a> {
type Item = (usize, usize, &'a str);
fn next(&mut self) -> Option<Self::Item> {
loop {
let left_pos = self.finder.find(&self.s.as_bytes()[self.start..])?;
let left_pos = self.start + left_pos;
let search_end = (left_pos + MAX_HASH_SIZE + HASH_PLACEHOLDER_OVERHEAD).min(self.s.len());
if let Some(right_pos) = memchr::memmem::find(
&self.s.as_bytes()[left_pos..search_end],
HASH_PLACEHOLDER_RIGHT.as_bytes(),
) {
let right_pos = left_pos + right_pos + HASH_PLACEHOLDER_RIGHT.len();
let placeholder = &self.s[left_pos..right_pos];
self.start = right_pos;
if is_hash_placeholder(placeholder) {
return Some((left_pos, right_pos, placeholder));
}
} else {
self.start = left_pos + HASH_PLACEHOLDER_LEFT.len();
}
}
}
}
pub fn find_hash_placeholders<'a>(
s: &'a str,
finder: &'a Finder<'static>,
) -> HashPlaceholderIter<'a> {
HashPlaceholderIter { s, finder, start: 0 }
}
const BASE: u32 = 64;
const CHARS: &[u8; BASE as usize] =
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$";
pub fn to_base64(mut value: u32) -> String {
let mut buffer = Vec::with_capacity(16);
loop {
let current_digit = value % BASE;
buffer.push(CHARS[current_digit as usize]);
value /= BASE;
if value == 0 {
break;
}
}
buffer.reverse();
unsafe { String::from_utf8_unchecked(buffer) }
}
#[derive(Debug, Default)]
pub struct HashPlaceholderGenerator {
next_index: u32,
}
impl HashPlaceholderGenerator {
pub fn generate(&mut self, len: Option<usize>, pattern_name: &str) -> SingleBuildResult<String> {
let len = len.unwrap_or(DEFAULT_HASH_SIZE);
if len > MAX_HASH_SIZE {
return Err(BuildDiagnostic::invalid_option(InvalidOptionType::HashLengthTooLong {
pattern_name: pattern_name.to_string(),
received: len,
max: MAX_HASH_SIZE,
}));
}
let index_in_base64 = to_base64(self.next_index);
let placeholder_size = index_in_base64.len() + HASH_PLACEHOLDER_OVERHEAD;
let mut placeholder =
String::with_capacity(len + HASH_PLACEHOLDER_LEFT.len() + HASH_PLACEHOLDER_RIGHT.len());
if placeholder_size > len {
return Err(BuildDiagnostic::invalid_option(InvalidOptionType::HashLengthTooShort {
pattern_name: pattern_name.to_string(),
received: len,
min: placeholder_size,
chunk_count: self.next_index + 1,
}));
}
placeholder.push_str(HASH_PLACEHOLDER_LEFT);
placeholder.extend(std::iter::repeat_n('0', len - placeholder_size));
placeholder.push_str(&index_in_base64);
placeholder.push_str(HASH_PLACEHOLDER_RIGHT);
self.next_index += 1;
Ok(placeholder)
}
}
#[expect(clippy::implicit_hasher)]
pub fn replace_placeholder_with_hash<'a>(
source: &'a str,
final_hashes_by_placeholder: &FxHashMap<String, &'a str>,
finder: &'a Finder<'static>,
) -> Cow<'a, str> {
let mut placeholders = find_hash_placeholders(source, finder).peekable();
if placeholders.peek().is_none() {
return Cow::Borrowed(source);
}
let mut result = String::with_capacity(source.len());
let mut last_end = 0;
for (start, end, placeholder) in placeholders {
result.push_str(&source[last_end..start]);
let replacement = final_hashes_by_placeholder.get(placeholder).unwrap_or(&placeholder);
result.push_str(replacement);
last_end = end;
}
if last_end < source.len() {
result.push_str(&source[last_end..]);
}
Cow::Owned(result)
}
pub fn extract_hash_placeholders<'a>(
source: &'a str,
finder: &'a Finder<'static>,
) -> FxIndexSet<&'a str> {
find_hash_placeholders(source, finder).map(|(_, _, placeholder)| placeholder).collect()
}
const NORMALIZED_PLACEHOLDER_INNER: [u8; MAX_HASH_SIZE] = [b'0'; MAX_HASH_SIZE];
pub fn visit_with_placeholders_defaulted<F, P>(
source: &str,
finder: &Finder<'static>,
is_known_placeholder: P,
mut visit: F,
) where
F: FnMut(&[u8]),
P: Fn(&str) -> bool,
{
let bytes = source.as_bytes();
let mut last_end = 0;
for (start, end, placeholder) in find_hash_placeholders(source, finder) {
if !is_known_placeholder(placeholder) {
continue;
}
visit(&bytes[last_end..start]);
visit(HASH_PLACEHOLDER_LEFT.as_bytes());
visit(&NORMALIZED_PLACEHOLDER_INNER[..placeholder.len() - HASH_PLACEHOLDER_OVERHEAD]);
visit(HASH_PLACEHOLDER_RIGHT.as_bytes());
last_end = end;
}
visit(&bytes[last_end..]);
}
#[test]
fn test_facade_hash_generator() {
let mut r#gen = HashPlaceholderGenerator::default();
assert_eq!(r#gen.generate(None, "").unwrap(), "!~{000}~");
assert_eq!(r#gen.generate(None, "").unwrap(), "!~{001}~");
}
#[test]
fn test_to_base64() {
assert_eq!(to_base64(0), "0");
assert_eq!(to_base64(1), "1");
assert_eq!(to_base64(10), "a");
assert_eq!(to_base64(64), "10");
assert_eq!(to_base64(65), "11");
assert_eq!(to_base64(128), "20");
assert_eq!(to_base64(100_000_000), "5Zu40");
}
#[test]
fn test_is_hash_placeholder() {
assert!(is_hash_placeholder("!~{000}~"));
assert!(is_hash_placeholder("!~{abc123}~"));
assert!(is_hash_placeholder("!~{_$ABC123}~"));
assert!(is_hash_placeholder("!~{12345678901234567}~"));
assert!(!is_hash_placeholder("!~{}~")); assert!(!is_hash_placeholder("!~{123456789012345678}~")); assert!(!is_hash_placeholder("!~{abc-123}~")); assert!(!is_hash_placeholder("{000}~")); assert!(!is_hash_placeholder("!~{000}")); assert!(!is_hash_placeholder("!~000}~")); }
#[test]
fn test_find_hash_placeholders() {
let s = "prefix!~{000}~middle!~{abc}~suffix";
let placeholders: Vec<_> = find_hash_placeholders(s, &HASH_PLACEHOLDER_LEFT_FINDER).collect();
assert_eq!(placeholders.len(), 2);
assert_eq!(placeholders[0], (6, 14, "!~{000}~"));
assert_eq!(placeholders[1], (20, 28, "!~{abc}~"));
let s = "no placeholders here";
let placeholders: Vec<_> = find_hash_placeholders(s, &HASH_PLACEHOLDER_LEFT_FINDER).collect();
assert_eq!(placeholders.len(), 0);
let s = "!~{000}~!~{001}~";
let placeholders: Vec<_> = find_hash_placeholders(s, &HASH_PLACEHOLDER_LEFT_FINDER).collect();
assert_eq!(placeholders.len(), 2);
assert_eq!(placeholders[0], (0, 8, "!~{000}~"));
assert_eq!(placeholders[1], (8, 16, "!~{001}~"));
}
#[test]
fn test_find_hash_placeholders_multi_byte_chars() {
let s = "import{C as e}from\"./vue.runtime.esm-bundler-!~{001}~.js\";// 中文级别文字";
let placeholders: Vec<_> = find_hash_placeholders(s, &HASH_PLACEHOLDER_LEFT_FINDER).collect();
assert_eq!(placeholders.len(), 1);
assert_eq!(placeholders[0].2, "!~{001}~");
}
#[test]
fn test_visit_with_placeholders_defaulted() {
use rustc_hash::FxHashSet;
fn collect(source: &str, known: &FxHashSet<&str>) -> Vec<u8> {
let mut buf = Vec::new();
visit_with_placeholders_defaulted(
source,
&HASH_PLACEHOLDER_LEFT_FINDER,
|p| known.contains(p),
|bytes| buf.extend_from_slice(bytes),
);
buf
}
let all_known: FxHashSet<&str> = ["!~{000}~", "!~{abc12}~"].into_iter().collect();
assert_eq!(
collect("prefix!~{000}~middle!~{abc12}~suffix", &all_known),
b"prefix!~{000}~middle!~{00000}~suffix",
);
let only_known: FxHashSet<&str> = std::iter::once("!~{000}~").collect();
assert_eq!(
collect("prefix!~{000}~middle!~{user}~suffix", &only_known),
b"prefix!~{000}~middle!~{user}~suffix",
);
let empty: FxHashSet<&str> = FxHashSet::default();
assert_eq!(collect("no placeholders here", &empty), b"no placeholders here");
assert_eq!(collect("", &empty), b"");
}