use super::c_common::{Token, TokenKind, emit_aggressive};
use super::{MinifyError, MinifyOptions, MinifyOutput};
pub fn minify(source: &str, opts: &MinifyOptions) -> Result<MinifyOutput, MinifyError> {
let toks = tokenize(source)?;
emit_aggressive(&toks, opts.keep_comments)
}
fn tokenize(src: &str) -> Result<Vec<Token<'_>>, MinifyError> {
let bytes = src.as_bytes();
let mut out: Vec<Token<'_>> = Vec::new();
let mut i = 0usize;
while i < bytes.len() {
let c = bytes[i];
if matches!(c, b' ' | b'\t' | b'\r') {
i += 1;
continue;
}
if c == b'\n' {
out.push(Token::new(TokenKind::Newline));
i += 1;
continue;
}
if c == b'/' && peek(bytes, i + 1) == Some(b'/') {
let start = i + 2;
let mut j = start;
while j < bytes.len() && bytes[j] != b'\n' {
j += 1;
}
out.push(Token::new(TokenKind::LineComment(&src[start..j])));
i = j;
continue;
}
if c == b'/' && peek(bytes, i + 1) == Some(b'*') {
let body_start = i + 2;
let mut j = body_start;
let mut depth = 1usize;
while j < bytes.len() {
if bytes[j] == b'/' && peek(bytes, j + 1) == Some(b'*') {
depth += 1;
j += 2;
continue;
}
if bytes[j] == b'*' && peek(bytes, j + 1) == Some(b'/') {
depth -= 1;
if depth == 0 {
let body = &src[body_start..j];
out.push(Token::new(TokenKind::BlockComment(body)));
i = j + 2;
break;
}
j += 2;
continue;
}
j += 1;
}
if depth != 0 {
return Err(MinifyError::new("unterminated /* */ block comment"));
}
continue;
}
if c == b'r' || c == b'b' {
if let Some((tok, n)) = try_scan_special_string(src, i)? {
out.push(Token::new(TokenKind::StrLit(tok)));
i += n;
continue;
}
}
if c == b'"' {
let n = scan_dq_string(src, i)?;
out.push(Token::new(TokenKind::StrLit(&src[i..i + n])));
i += n;
continue;
}
if c == b'\'' {
let (kind, n) = scan_quote(src, i)?;
match kind {
QuoteKind::Char => out.push(Token::new(TokenKind::StrLit(&src[i..i + n]))),
QuoteKind::Lifetime => out.push(Token::new(TokenKind::Word(&src[i..i + n]))),
}
i += n;
continue;
}
if is_word_start(src, i) {
let n = scan_word(src, i);
out.push(Token::new(TokenKind::Word(&src[i..i + n])));
i += n;
continue;
}
let n = scan_multi_punct(bytes, i);
out.push(Token::new(TokenKind::Punct(&src[i..i + n])));
i += n;
}
Ok(out)
}
fn scan_multi_punct(bytes: &[u8], i: usize) -> usize {
let three = bytes
.get(i..i + 3)
.map(|s| std::str::from_utf8(s).unwrap_or(""))
.unwrap_or("");
let two = bytes
.get(i..i + 2)
.map(|s| std::str::from_utf8(s).unwrap_or(""))
.unwrap_or("");
if matches!(three, "..=" | "<<=" | ">>=") {
return 3;
}
if matches!(
two,
"->" | "=>"
| "::"
| "=="
| "!="
| "<="
| ">="
| "&&"
| "||"
| "<<"
| ">>"
| ".."
| "+="
| "-="
| "*="
| "/="
| "%="
| "&="
| "|="
| "^="
) {
return 2;
}
let c = char_at(unsafe { std::str::from_utf8_unchecked(bytes) }, i);
c.len_utf8()
}
#[derive(Debug)]
enum QuoteKind {
Char,
Lifetime,
}
fn try_scan_special_string(src: &str, i: usize) -> Result<Option<(&str, usize)>, MinifyError> {
let bytes = src.as_bytes();
let mut p = i;
let mut byte = false;
if bytes[p] == b'b' {
if peek(bytes, p + 1) == Some(b'\'') {
return Ok(None);
}
byte = true;
p += 1;
}
let mut raw = false;
if peek(bytes, p) == Some(b'r') && p > i {
raw = true;
p += 1;
} else if !byte && peek(bytes, p) == Some(b'r') {
raw = true;
p += 1;
}
let mut hashes = 0usize;
if raw {
while peek(bytes, p) == Some(b'#') {
hashes += 1;
p += 1;
}
}
if peek(bytes, p) != Some(b'"') {
return Ok(None);
}
let body_start = p + 1;
if raw {
let mut j = body_start;
loop {
if j >= bytes.len() {
return Err(MinifyError::new("unterminated raw string literal"));
}
if bytes[j] == b'"' {
let mut k = j + 1;
let mut found = 0;
while k < bytes.len() && bytes[k] == b'#' && found < hashes {
found += 1;
k += 1;
}
if found == hashes {
let total = k - i;
return Ok(Some((&src[i..i + total], total)));
}
}
j += 1;
}
} else {
let n = scan_dq_string(src, p)?;
let total = (p - i) + n;
Ok(Some((&src[i..i + total], total)))
}
}
fn scan_dq_string(src: &str, i: usize) -> Result<usize, MinifyError> {
let bytes = src.as_bytes();
debug_assert_eq!(bytes[i], b'"');
let mut j = i + 1;
while j < bytes.len() {
match bytes[j] {
b'\\' => {
j += 2;
}
b'"' => return Ok(j + 1 - i),
_ => {
j += 1;
}
}
}
Err(MinifyError::new("unterminated string literal"))
}
fn scan_quote(src: &str, i: usize) -> Result<(QuoteKind, usize), MinifyError> {
let bytes = src.as_bytes();
debug_assert_eq!(bytes[i], b'\'');
let after = i + 1;
if after >= bytes.len() {
return Err(MinifyError::new("unterminated `'`"));
}
if bytes[after] == b'\\' {
let mut j = after + 1;
if j >= bytes.len() {
return Err(MinifyError::new("unterminated char escape"));
}
let esc = bytes[j];
j += 1;
if esc == b'x' {
j = j.saturating_add(2).min(bytes.len()); } else if esc == b'u' && peek(bytes, j) == Some(b'{') {
j += 1;
while j < bytes.len() && bytes[j] != b'}' {
j += 1;
}
if j < bytes.len() {
j += 1;
}
}
if peek(bytes, j) != Some(b'\'') {
return Err(MinifyError::new("malformed char literal"));
}
return Ok((QuoteKind::Char, j + 1 - i));
}
let id_start = after;
let mut j = id_start;
while j < bytes.len() && is_id_continue(char_at(src, j)) {
j += char_at(src, j).len_utf8();
}
if j < bytes.len() && bytes[j] == b'\'' {
return Ok((QuoteKind::Char, j + 1 - i));
}
if j == id_start {
let cl = char_at(src, j).len_utf8();
if peek(bytes, j + cl) == Some(b'\'') {
return Ok((QuoteKind::Char, j + cl + 1 - i));
}
return Err(MinifyError::new("malformed `'` token"));
}
Ok((QuoteKind::Lifetime, j - i))
}
fn is_word_start(src: &str, i: usize) -> bool {
let c = char_at(src, i);
c.is_alphabetic() || c == '_' || c.is_ascii_digit()
}
fn is_id_continue(c: char) -> bool {
c.is_alphanumeric() || c == '_'
}
fn scan_word(src: &str, i: usize) -> usize {
let mut j = i;
let bytes = src.as_bytes();
let len = bytes.len();
while j < len {
let c = char_at(src, j);
if c.is_alphanumeric() || c == '_' {
j += c.len_utf8();
continue;
}
if c == '.' && j > i {
let next = peek(bytes, j + 1);
if matches!(next, Some(b'0'..=b'9')) {
j += 1;
continue;
}
}
break;
}
j - i
}
fn peek(bytes: &[u8], i: usize) -> Option<u8> {
bytes.get(i).copied()
}
fn char_at(src: &str, i: usize) -> char {
src[i..].chars().next().unwrap_or('\0')
}
#[cfg(test)]
mod tests {
use super::*;
fn min(s: &str) -> String {
minify(s, &MinifyOptions::default()).unwrap().body
}
fn min_keep(s: &str) -> String {
minify(
s,
&MinifyOptions {
keep_comments: true,
},
)
.unwrap()
.body
}
#[test]
fn basic_function() {
let src = "fn add(a: i32, b: i32) -> i32 {\n a + b\n}\n";
let out = min(src);
assert_eq!(out, "fn add(a:i32,b:i32)->i32{a+b}");
}
#[test]
fn strips_line_comments() {
let src = "fn x() {\n // hi\n 1\n}\n";
let out = min(src);
assert_eq!(out, "fn x(){1}");
}
#[test]
fn strips_doc_comments() {
let src = "/// docs go here\nfn x() {}\n";
let out = min(src);
assert_eq!(out, "fn x(){}");
}
#[test]
fn nested_block_comment_stripped() {
let src = "fn x() { /* outer /* inner */ outer */ 1 }";
let out = min(src);
assert_eq!(out, "fn x(){1}");
}
#[test]
fn keep_comments_converts_line_to_block() {
let src = "fn x() {\n // hello\n 1\n}\n";
let r = minify(
src,
&MinifyOptions {
keep_comments: true,
},
)
.unwrap();
assert!(r.body.contains("/* hello*/"));
assert_eq!(r.warnings.len(), 1);
}
#[test]
fn keep_comments_preserves_block_comment() {
let src = "fn x() { /* hello */ 1 }";
let out = min_keep(src);
assert!(out.contains("/* hello */"));
}
#[test]
fn raw_string_simple() {
let src = r#"let s = r"hello";"#;
let out = min(src);
assert_eq!(out, r#"let s=r"hello";"#);
}
#[test]
fn raw_string_with_hashes() {
let src = "let s = r##\"con\"tains\"##;";
let out = min(src);
assert!(out.contains("r##\"con\"tains\"##"), "got: {}", out);
}
#[test]
fn byte_string() {
let src = r#"let s = b"\xff\x00";"#;
let out = min(src);
assert_eq!(out, r#"let s=b"\xff\x00";"#);
}
#[test]
fn raw_byte_string() {
let src = r#"let s = br"raw bytes";"#;
let out = min(src);
assert!(out.contains(r#"br"raw bytes""#));
}
#[test]
fn lifetime_preserved() {
let src = "fn foo<'a>(x: &'a str) -> &'a str { x }";
let out = min(src);
assert_eq!(out, "fn foo<'a>(x:&'a str)->&'a str{x}");
}
#[test]
fn static_lifetime() {
let src = "let s: &'static str = \"hi\";";
let out = min(src);
assert_eq!(out, "let s:&'static str=\"hi\";");
}
#[test]
fn char_literal() {
let src = "let c = 'a'; let d = '\\n'; let e = '\\u{1F600}';";
let out = min(src);
assert!(out.contains("'a'"));
assert!(out.contains("'\\n'"));
assert!(out.contains("'\\u{1F600}'"));
}
#[test]
fn byte_char() {
let src = "let c = b'a';";
let out = min(src);
assert_eq!(out, "let c=b'a';");
}
#[test]
fn underscored_number() {
let src = "let n = 1_000_000;";
let out = min(src);
assert_eq!(out, "let n=1_000_000;");
}
#[test]
fn hex_number_with_suffix() {
let src = "let n = 0xFF_u32;";
let out = min(src);
assert_eq!(out, "let n=0xFF_u32;");
}
#[test]
fn float_literal() {
let src = "let f = 1.5e10;";
let out = min(src);
assert_eq!(out, "let f=1.5e10;");
}
#[test]
fn double_colon_preserved() {
let src = "use std::collections::HashMap;";
let out = min(src);
assert_eq!(out, "use std::collections::HashMap;");
}
#[test]
fn arrow_preserved() {
let src = "fn x() -> i32 { 0 }";
let out = min(src);
assert_eq!(out, "fn x()->i32{0}");
}
#[test]
fn fat_arrow_preserved() {
let src = "match x { 1 => true, _ => false }";
let out = min(src);
assert_eq!(out, "match x{1=>true,_=>false}");
}
#[test]
fn unicode_identifier() {
let src = "let π = 3.14;";
let out = min(src);
assert_eq!(out, "let π=3.14;");
}
#[test]
fn range_operator() {
let src = "let r = 1..5;";
let out = min(src);
assert_eq!(out, "let r=1..5;");
}
#[test]
fn unterminated_string_errors() {
let src = "let s = \"unterminated";
assert!(minify(src, &MinifyOptions::default()).is_err());
}
#[test]
fn unterminated_block_comment_errors() {
let src = "fn x() { /* no end";
assert!(minify(src, &MinifyOptions::default()).is_err());
}
#[test]
fn nested_block_comment_unbalanced_errors() {
let src = "fn x() { /* /* */ }";
assert!(minify(src, &MinifyOptions::default()).is_err());
}
}