use std::collections::HashSet;
pub(super) fn collect_declared_underscore_names(source: &str) -> HashSet<String> {
let mut out: HashSet<String> = HashSet::new();
let mut cursor = LexCursor::new(source);
while let Some(event) = cursor.next_event() {
if let Event::Keyword(KeywordKind::Const | KeywordKind::Let | KeywordKind::Var) = event {
if let Some(next) = cursor.peek_next_code_ident()
&& next.starts_with("__")
{
out.insert(next.to_string());
}
}
}
out
}
pub(super) fn rewrite_identifiers(
source: &str,
declared: &HashSet<String>,
suffix: &str,
) -> String {
if declared.is_empty() {
return source.to_string();
}
let mut out = String::with_capacity(source.len() + declared.len() * suffix.len());
let mut cursor = LexCursor::new(source);
loop {
let start = cursor.pos();
let Some(event) = cursor.next_event() else {
out.push_str(&source[start..]);
break;
};
out.push_str(&source[start..cursor.event_start]);
match event {
Event::Ident { start: is, end: ie } => {
let name = &source[is..ie];
if declared.contains(name) {
out.push_str(name);
out.push_str(suffix);
} else {
out.push_str(name);
}
}
Event::Keyword(_) => {
out.push_str(&source[cursor.event_start..cursor.pos()]);
}
Event::StringLike { start: ss, end: se }
| Event::Comment { start: ss, end: se }
| Event::Regex { start: ss, end: se } => {
out.push_str(&source[ss..se]);
}
Event::Punct => {
out.push_str(&source[cursor.event_start..cursor.pos()]);
}
}
}
out
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum KeywordKind {
Const,
Let,
Var,
ExprPrefix,
}
#[derive(Debug, Clone, Copy)]
enum Event {
Ident { start: usize, end: usize },
Keyword(KeywordKind),
StringLike { start: usize, end: usize },
Comment { start: usize, end: usize },
Regex { start: usize, end: usize },
Punct,
}
struct LexCursor<'a> {
src: &'a [u8],
pos: usize,
event_start: usize,
prev_kind: PrevKind,
modes: Vec<Mode>,
}
#[derive(Debug, Clone, Copy)]
enum Mode {
TemplateText,
TemplateExpr { brace_depth: u32 },
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum PrevKind {
ExprStart,
ExprEnd,
}
impl<'a> LexCursor<'a> {
fn new(src: &'a str) -> Self {
Self {
src: src.as_bytes(),
pos: 0,
event_start: 0,
prev_kind: PrevKind::ExprStart,
modes: Vec::new(),
}
}
fn pos(&self) -> usize {
self.pos
}
fn peek(&self, off: usize) -> Option<u8> {
self.src.get(self.pos + off).copied()
}
fn skip_whitespace_and_comments(&mut self) {
loop {
match self.peek(0) {
Some(b' ' | b'\t' | b'\r' | b'\n') => {
self.pos += 1;
}
Some(b'/') if self.peek(1) == Some(b'/') => {
self.pos += 2;
while let Some(c) = self.peek(0) {
self.pos += 1;
if c == b'\n' {
break;
}
}
}
Some(b'/') if self.peek(1) == Some(b'*') => {
self.pos += 2;
while let Some(c) = self.peek(0) {
if c == b'*' && self.peek(1) == Some(b'/') {
self.pos += 2;
break;
}
self.pos += 1;
}
}
_ => break,
}
}
}
fn peek_next_code_ident(&mut self) -> Option<&'a str> {
let saved_pos = self.pos;
let saved_event_start = self.event_start;
let saved_prev_kind = self.prev_kind;
self.skip_whitespace_and_comments();
let result = if let Some((_, len)) = self.next_ident_char(true) {
let start = self.pos;
self.pos += len;
while let Some((_, n)) = self.next_ident_char(false) {
self.pos += n;
}
std::str::from_utf8(&self.src[start..self.pos]).ok()
} else {
None
};
self.pos = saved_pos;
self.event_start = saved_event_start;
self.prev_kind = saved_prev_kind;
result
}
fn next_event(&mut self) -> Option<Event> {
if matches!(self.modes.last(), Some(Mode::TemplateText)) {
return self.next_event_in_template_text();
}
while self.pos < self.src.len() {
let c = self.src[self.pos];
self.event_start = self.pos;
if matches!(c, b' ' | b'\t' | b'\r' | b'\n') {
self.pos += 1;
continue;
}
if c == b'/' && self.peek(1) == Some(b'/') {
let start = self.pos;
self.pos += 2;
while let Some(b) = self.peek(0) {
self.pos += 1;
if b == b'\n' {
break;
}
}
return Some(Event::Comment {
start,
end: self.pos,
});
}
if c == b'/' && self.peek(1) == Some(b'*') {
let start = self.pos;
self.pos += 2;
while let Some(b) = self.peek(0) {
if b == b'*' && self.peek(1) == Some(b'/') {
self.pos += 2;
break;
}
self.pos += 1;
}
return Some(Event::Comment {
start,
end: self.pos,
});
}
if c == b'/' && self.prev_kind == PrevKind::ExprStart {
let start = self.pos;
self.pos += 1; let mut in_class = false;
while let Some(b) = self.peek(0) {
match b {
b'\\' => {
self.pos += 1;
if self.pos < self.src.len() {
self.pos += 1;
}
}
b'[' => {
in_class = true;
self.pos += 1;
}
b']' if in_class => {
in_class = false;
self.pos += 1;
}
b'/' if !in_class => {
self.pos += 1;
while let Some((_, n)) = self.next_ident_char(false) {
self.pos += n;
}
break;
}
b'\n' | b'\r' => {
break;
}
_ => self.pos += 1,
}
}
self.prev_kind = PrevKind::ExprEnd;
return Some(Event::Regex {
start,
end: self.pos,
});
}
if c == b'\'' || c == b'"' {
let start = self.pos;
let quote = c;
self.pos += 1;
while let Some(b) = self.peek(0) {
match b {
b'\\' => {
self.pos += 1;
if self.pos < self.src.len() {
self.pos += 1;
}
}
_ if b == quote => {
self.pos += 1;
break;
}
_ => self.pos += 1,
}
}
self.prev_kind = PrevKind::ExprEnd;
return Some(Event::StringLike {
start,
end: self.pos,
});
}
if c == b'`' {
self.pos += 1;
self.modes.push(Mode::TemplateText);
self.prev_kind = PrevKind::ExprEnd;
return Some(Event::Punct);
}
if let Some((_, first_len)) = self.next_ident_char(true) {
let start = self.pos;
self.pos += first_len;
while let Some((_, n)) = self.next_ident_char(false) {
self.pos += n;
}
let text = &self.src[start..self.pos];
let kw = match text {
b"const" => Some(KeywordKind::Const),
b"let" => Some(KeywordKind::Let),
b"var" => Some(KeywordKind::Var),
b"return" | b"typeof" | b"delete" | b"void" | b"throw" | b"new"
| b"instanceof" | b"in" | b"of" | b"yield" | b"await" | b"case" => {
Some(KeywordKind::ExprPrefix)
}
_ => None,
};
if let Some(kw) = kw {
self.prev_kind = PrevKind::ExprStart;
return Some(Event::Keyword(kw));
}
self.prev_kind = PrevKind::ExprEnd;
return Some(Event::Ident {
start,
end: self.pos,
});
}
if c.is_ascii_digit() {
let start = self.pos;
while let Some(b) = self.peek(0) {
if b.is_ascii_alphanumeric() || b == b'.' || b == b'_' {
self.pos += 1;
} else {
break;
}
}
self.prev_kind = PrevKind::ExprEnd;
return Some(Event::Ident {
start,
end: self.pos,
});
}
self.pos += 1;
match c {
b')' | b']' => self.prev_kind = PrevKind::ExprEnd,
b'{' => {
if let Some(Mode::TemplateExpr { brace_depth }) = self.modes.last_mut() {
*brace_depth += 1;
}
self.prev_kind = PrevKind::ExprStart;
}
b'}' => {
if let Some(Mode::TemplateExpr { brace_depth }) = self.modes.last_mut() {
if *brace_depth == 0 {
self.modes.pop();
} else {
*brace_depth -= 1;
}
}
self.prev_kind = PrevKind::ExprEnd;
}
_ => self.prev_kind = PrevKind::ExprStart,
}
return Some(Event::Punct);
}
None
}
fn next_event_in_template_text(&mut self) -> Option<Event> {
self.event_start = self.pos;
let start = self.pos;
while self.pos < self.src.len() {
let b = self.src[self.pos];
match b {
b'\\' => {
self.pos += 1;
if self.pos < self.src.len() {
self.pos += 1;
}
}
b'`' => {
let end = self.pos;
self.modes.pop();
return Some(Event::StringLike { start, end });
}
b'$' if self.peek(1) == Some(b'{') => {
let end = self.pos;
let top = self.modes.last_mut().expect("mode stack invariant");
*top = Mode::TemplateExpr { brace_depth: 0 };
self.prev_kind = PrevKind::ExprStart;
return Some(Event::StringLike { start, end });
}
_ => self.pos += 1,
}
}
let end = self.pos;
self.modes.pop();
if end > start {
Some(Event::StringLike { start, end })
} else {
None
}
}
}
impl<'a> LexCursor<'a> {
fn next_ident_char(&self, is_start: bool) -> Option<(char, usize)> {
if self.pos >= self.src.len() {
return None;
}
let b = self.src[self.pos];
if b.is_ascii() {
let ok = if is_start {
b.is_ascii_alphabetic() || b == b'_' || b == b'$'
} else {
b.is_ascii_alphanumeric() || b == b'_' || b == b'$'
};
return if ok { Some((b as char, 1)) } else { None };
}
let char_len = if b < 0xc0 {
return None;
} else if b < 0xe0 {
2
} else if b < 0xf0 {
3
} else {
4
};
if self.pos + char_len > self.src.len() {
return None;
}
let s = std::str::from_utf8(&self.src[self.pos..self.pos + char_len]).ok()?;
let ch = s.chars().next()?;
let ok = if is_start {
unicode_ident::is_xid_start(ch)
} else {
unicode_ident::is_xid_continue(ch)
};
if ok { Some((ch, ch.len_utf8())) } else { None }
}
}
#[cfg(test)]
mod tests {
use super::*;
fn rewrite(source: &str, declared: &[&str]) -> String {
let set: HashSet<String> = declared.iter().map(|s| s.to_string()).collect();
rewrite_identifiers(source, &set, "$7")
}
#[test]
fn collects_simple_const_declaration() {
let set = collect_declared_underscore_names("const __v = 1;");
assert!(set.contains("__v"), "got: {:?}", set);
}
#[test]
fn collects_let_and_var() {
let set = collect_declared_underscore_names("let __a = 0; var __b;");
assert!(set.contains("__a"));
assert!(set.contains("__b"));
}
#[test]
fn ignores_non_underscore_declarations() {
let set = collect_declared_underscore_names("const v = 1; let x;");
assert!(set.is_empty(), "got: {:?}", set);
}
#[test]
fn collects_across_newline_between_keyword_and_name() {
let set = collect_declared_underscore_names("const\n __v = 1;");
assert!(set.contains("__v"), "got: {:?}", set);
}
#[test]
fn rewrites_simple_const_and_uses() {
let out = rewrite("const __v = 1; __v + 2", &["__v"]);
assert!(out.contains("__v$7"), "got: {}", out);
assert!(
!out.split_whitespace()
.any(|t| t == "__v" || t == "__v;" || t == "__v,"),
"unrenamed __v in: {}",
out
);
}
#[test]
fn string_literal_interior_is_not_rewritten() {
let declared = &["__v"];
let declared_set: HashSet<String> =
collect_declared_underscore_names(r#"const s = "const __v = inside";"#);
assert!(
declared_set.is_empty(),
"string-interior `const __v` should not register as a declaration: {:?}",
declared_set
);
let out = rewrite(r#"const __v = 1; const s = "__v inside";"#, declared);
assert!(
out.contains("__v$7 = 1"),
"real declaration should be renamed: {}",
out
);
assert!(
out.contains("\"__v inside\""),
"string literal must be unchanged: {}",
out
);
}
#[test]
fn single_quoted_string_interior_is_not_rewritten() {
let out = rewrite("const __v = 1; const s = '__v inside';", &["__v"]);
assert!(out.contains("'__v inside'"), "got: {}", out);
}
#[test]
fn line_comment_interior_is_not_rewritten() {
let out = rewrite("const __v = 1; // __v used here\n__v + 2", &["__v"]);
assert!(out.contains("// __v used here"), "got: {}", out);
assert!(out.contains("__v$7 + 2"), "got: {}", out);
}
#[test]
fn block_comment_interior_is_not_rewritten() {
let out = rewrite("const __v = 1; /* see __v above */ __v + 2", &["__v"]);
assert!(out.contains("/* see __v above */"), "got: {}", out);
assert!(out.contains("__v$7 + 2"), "got: {}", out);
}
#[test]
fn regex_literal_interior_is_not_rewritten() {
let out = rewrite("const __v = 1; const re = /__v test/g;", &["__v"]);
assert!(
out.contains("/__v test/g"),
"regex should be preserved: {}",
out
);
assert!(out.contains("__v$7 = 1"), "got: {}", out);
}
#[test]
fn division_after_ident_is_not_misread_as_regex() {
let out = rewrite("const __v = 1; const x = a / __v / b;", &["__v"]);
assert!(
out.contains("a / __v$7 / b"),
"expected division, got: {}",
out
);
}
#[test]
fn template_literal_text_is_not_rewritten() {
let out = rewrite("const __v = 1; const s = `literal __v here`;", &["__v"]);
assert!(out.contains("`literal __v here`"), "got: {}", out);
assert!(out.contains("__v$7 = 1"), "got: {}", out);
}
#[test]
fn template_expression_slot_is_rewritten() {
let out = rewrite("const __v = 1; const s = `${__v}`;", &["__v"]);
assert!(out.contains("${__v$7}"), "got: {}", out);
}
#[test]
fn nested_template_literals_work() {
let out = rewrite(
"const __v = 1; const s = `outer ${`inner ${__v}`}`;",
&["__v"],
);
assert!(out.contains("${__v$7}"), "got: {}", out);
}
#[test]
fn empty_declared_set_is_identity() {
let source = "const v = 1; const s = \"__v\";";
let out = rewrite(source, &[]);
assert_eq!(out, source);
}
#[test]
fn multiple_declared_names_rewrite_independently() {
let out = rewrite("const __a = 1; const __b = 2; __a + __b", &["__a", "__b"]);
assert!(out.contains("__a$7 = 1"), "got: {}", out);
assert!(out.contains("__b$7 = 2"), "got: {}", out);
assert!(out.contains("__a$7 + __b$7"), "got: {}", out);
}
#[test]
fn collection_and_rewrite_round_trip_on_macro_shape() {
let body = "{ const __v = []; __v.push(1); __v.push(2); __v }";
let declared = collect_declared_underscore_names(body);
assert!(declared.contains("__v"), "got: {:?}", declared);
let out = rewrite_identifiers(body, &declared, "$3");
assert!(out.contains("const __v$3 = []"), "got: {}", out);
assert!(out.contains("__v$3.push(1)"), "got: {}", out);
assert!(out.contains("__v$3.push(2)"), "got: {}", out);
}
#[test]
fn declaration_inside_string_does_not_register() {
let declared = collect_declared_underscore_names(r#"const s = "const __fake = 1";"#);
assert!(declared.is_empty(), "got: {:?}", declared);
}
#[test]
fn declaration_inside_comment_does_not_register() {
let declared = collect_declared_underscore_names("// const __fake = 1\nconst __real = 2;");
assert!(declared.contains("__real"), "got: {:?}", declared);
assert!(!declared.contains("__fake"), "got: {:?}", declared);
}
#[test]
fn collects_latin_accented_identifier() {
let declared = collect_declared_underscore_names("const __café = 1;");
assert!(
declared.contains("__café"),
"expected `__café` to be collected, got: {:?}",
declared
);
assert!(
!declared.contains("__caf"),
"cursor should not have truncated at the accented char: {:?}",
declared
);
}
#[test]
fn collects_cjk_identifier() {
let declared = collect_declared_underscore_names("const __中文 = 1;");
assert!(
declared.contains("__中文"),
"expected `__中文` to be collected, got: {:?}",
declared
);
}
#[test]
fn rewrites_accented_identifier() {
let out = rewrite("const __café = 1; __café + 2", &["__café"]);
assert!(
out.contains("__café$7"),
"expected suffix-renamed Unicode ident, got: {}",
out
);
assert!(
out.contains("__café$7 + 2"),
"both occurrences should be rewritten, got: {}",
out
);
}
#[test]
fn rewrites_cjk_identifier() {
let out = rewrite("const __中文 = 1; __中文.test()", &["__中文"]);
assert!(
out.contains("__中文$7"),
"expected suffix-renamed CJK ident, got: {}",
out
);
assert!(
out.contains("__中文$7.test()"),
"second occurrence should also be rewritten: {}",
out
);
}
#[test]
fn non_ascii_inside_string_literal_stays_literal() {
let out = rewrite(r#"const __v = 1; const s = "café"; __v"#, &["__v"]);
assert!(
out.contains("\"café\""),
"string literal contents should be preserved: {}",
out
);
assert!(
out.contains("__v$7"),
"non-string `__v` use should still be rewritten: {}",
out
);
}
#[test]
fn emoji_is_not_a_valid_identifier_character() {
let declared = collect_declared_underscore_names("const __👋 = 1;");
assert!(
!declared.iter().any(|n| n.contains('👋')),
"emoji should never appear in a collected identifier: {:?}",
declared
);
}
}