1use super::c_common::{Token, TokenKind, emit_conservative};
28use super::{MinifyError, MinifyOptions, MinifyOutput};
29
30pub fn minify(source: &str, opts: &MinifyOptions) -> Result<MinifyOutput, MinifyError> {
31 let toks = tokenize(source)?;
32 emit_conservative(&toks, opts.keep_comments)
33}
34
35fn tokenize(src: &str) -> Result<Vec<Token<'_>>, MinifyError> {
36 let bytes = src.as_bytes();
37 let mut out: Vec<Token<'_>> = Vec::new();
38 let mut i = 0usize;
39 while i < bytes.len() {
40 let c = bytes[i];
41 if matches!(c, b' ' | b'\t' | b'\r') {
42 i += 1;
43 continue;
44 }
45 if c == b'\n' {
46 out.push(Token::new(TokenKind::Newline));
47 i += 1;
48 continue;
49 }
50 if c == b'/' && peek(bytes, i + 1) == Some(b'/') {
51 let start = i + 2;
52 let mut j = start;
53 while j < bytes.len() && bytes[j] != b'\n' {
54 j += 1;
55 }
56 out.push(Token::new(TokenKind::LineComment(&src[start..j])));
57 i = j;
58 continue;
59 }
60 if c == b'/' && peek(bytes, i + 1) == Some(b'*') {
61 let body_start = i + 2;
62 let mut j = body_start;
63 let mut found = false;
64 while j + 1 < bytes.len() {
65 if bytes[j] == b'*' && bytes[j + 1] == b'/' {
66 found = true;
67 break;
68 }
69 j += 1;
70 }
71 if !found {
72 return Err(MinifyError::new("unterminated /* */ block comment"));
73 }
74 out.push(Token::new(TokenKind::BlockComment(&src[body_start..j])));
75 i = j + 2;
76 continue;
77 }
78 if c == b'/' && regex_is_expected(&out) {
80 let n = scan_regex(src, i)?;
81 out.push(Token::new(TokenKind::Regex(&src[i..i + n])));
82 i += n;
83 continue;
84 }
85 if c == b'"' || c == b'\'' {
86 let n = scan_quoted_string(src, i, c)?;
87 out.push(Token::new(TokenKind::StrLit(&src[i..i + n])));
88 i += n;
89 continue;
90 }
91 if c == b'`' {
92 let n = scan_template(src, i)?;
93 out.push(Token::new(TokenKind::Template(&src[i..i + n])));
94 i += n;
95 continue;
96 }
97 if is_word_start(src, i) {
98 let n = scan_word(src, i);
99 out.push(Token::new(TokenKind::Word(&src[i..i + n])));
100 i += n;
101 continue;
102 }
103 let n = scan_multi_punct(bytes, i);
104 out.push(Token::new(TokenKind::Punct(&src[i..i + n])));
105 i += n;
106 }
107 Ok(out)
108}
109
110fn regex_is_expected(prev_tokens: &[Token<'_>]) -> bool {
114 for tok in prev_tokens.iter().rev() {
116 match &tok.kind {
117 TokenKind::LineComment(_) | TokenKind::BlockComment(_) | TokenKind::Newline => continue,
118 TokenKind::Word(s) => {
119 return matches!(
120 *s,
121 "return"
122 | "typeof"
123 | "in"
124 | "of"
125 | "delete"
126 | "void"
127 | "new"
128 | "throw"
129 | "await"
130 | "yield"
131 | "instanceof"
132 | "case"
133 | "do"
134 | "else"
135 );
136 }
137 TokenKind::Punct(s) => {
138 return !matches!(*s, ")" | "]" | "}" | "++" | "--");
141 }
142 TokenKind::StrLit(_)
143 | TokenKind::Template(_)
144 | TokenKind::Regex(_)
145 | TokenKind::Preproc(_) => return false,
146 }
147 }
148 true
151}
152
153fn scan_regex(src: &str, i: usize) -> Result<usize, MinifyError> {
154 let bytes = src.as_bytes();
155 debug_assert_eq!(bytes[i], b'/');
156 let mut j = i + 1;
157 let mut in_class = false;
158 while j < bytes.len() {
159 match bytes[j] {
160 b'\\' => {
161 j += 2;
162 continue;
163 }
164 b'[' => {
165 in_class = true;
166 j += 1;
167 }
168 b']' if in_class => {
169 in_class = false;
170 j += 1;
171 }
172 b'/' if !in_class => {
173 j += 1;
175 while j < bytes.len() && bytes[j].is_ascii_alphabetic() {
176 j += 1;
177 }
178 return Ok(j - i);
179 }
180 b'\n' => return Err(MinifyError::new("newline in regex literal")),
181 _ => j += 1,
182 }
183 }
184 Err(MinifyError::new("unterminated regex literal"))
185}
186
187fn scan_quoted_string(src: &str, i: usize, quote: u8) -> Result<usize, MinifyError> {
188 let bytes = src.as_bytes();
189 debug_assert_eq!(bytes[i], quote);
190 let mut j = i + 1;
191 while j < bytes.len() {
192 if bytes[j] == b'\\' {
193 if peek(bytes, j + 1) == Some(b'\n') {
195 j += 2;
196 continue;
197 }
198 j += 2;
199 continue;
200 }
201 if bytes[j] == quote {
202 return Ok(j + 1 - i);
203 }
204 if bytes[j] == b'\n' {
205 return Err(MinifyError::new("newline in string literal"));
206 }
207 j += 1;
208 }
209 Err(MinifyError::new("unterminated string literal"))
210}
211
212fn scan_template(src: &str, i: usize) -> Result<usize, MinifyError> {
213 let bytes = src.as_bytes();
214 debug_assert_eq!(bytes[i], b'`');
215 let mut j = i + 1;
216 while j < bytes.len() {
217 match bytes[j] {
218 b'\\' => {
219 j += 2;
220 }
221 b'`' => return Ok(j + 1 - i),
222 b'$' if peek(bytes, j + 1) == Some(b'{') => {
223 j += 2;
226 let mut depth = 1usize;
227 while j < bytes.len() && depth > 0 {
228 match bytes[j] {
229 b'{' => {
230 depth += 1;
231 j += 1;
232 }
233 b'}' => {
234 depth -= 1;
235 j += 1;
236 }
237 b'`' => {
238 let inner = scan_template(src, j)?;
240 j += inner;
241 }
242 b'"' | b'\'' => {
243 let q = bytes[j];
244 j += scan_quoted_string(src, j, q)?;
245 }
246 b'/' if peek(bytes, j + 1) == Some(b'/') => {
247 while j < bytes.len() && bytes[j] != b'\n' {
248 j += 1;
249 }
250 }
251 b'/' if peek(bytes, j + 1) == Some(b'*') => {
252 j += 2;
253 while j + 1 < bytes.len() && !(bytes[j] == b'*' && bytes[j + 1] == b'/')
254 {
255 j += 1;
256 }
257 if j + 1 >= bytes.len() {
258 return Err(MinifyError::new("unterminated /* */ inside template"));
259 }
260 j += 2;
261 }
262 b'\\' => {
263 j += 2;
264 }
265 _ => j += 1,
266 }
267 }
268 if depth != 0 {
269 return Err(MinifyError::new("unterminated `${…}` in template"));
270 }
271 }
272 _ => j += 1,
273 }
274 }
275 Err(MinifyError::new("unterminated template literal"))
276}
277
278fn is_word_start(src: &str, i: usize) -> bool {
279 let c = char_at(src, i);
280 c.is_alphabetic() || c == '_' || c == '$' || c.is_ascii_digit()
281}
282
283fn scan_word(src: &str, i: usize) -> usize {
284 let bytes = src.as_bytes();
285 let mut j = i;
286 while j < bytes.len() {
287 let c = char_at(src, j);
288 if c.is_alphanumeric() || c == '_' || c == '$' {
289 j += c.len_utf8();
290 continue;
291 }
292 if c == '.' {
293 let next = peek(bytes, j + 1);
295 if matches!(next, Some(b'0'..=b'9')) && j > i {
296 j += 1;
297 continue;
298 }
299 }
300 break;
301 }
302 j - i
303}
304
305fn scan_multi_punct(bytes: &[u8], i: usize) -> usize {
306 let four = bytes
307 .get(i..i + 4)
308 .map(|s| std::str::from_utf8(s).unwrap_or(""))
309 .unwrap_or("");
310 let three = bytes
311 .get(i..i + 3)
312 .map(|s| std::str::from_utf8(s).unwrap_or(""))
313 .unwrap_or("");
314 let two = bytes
315 .get(i..i + 2)
316 .map(|s| std::str::from_utf8(s).unwrap_or(""))
317 .unwrap_or("");
318 if matches!(four, ">>>=") {
319 return 4;
320 }
321 if matches!(
322 three,
323 "===" | "!==" | "..." | ">>>" | "**=" | "<<=" | ">>=" | "??="
324 ) {
325 return 3;
326 }
327 if matches!(
328 two,
329 "=>" | "=="
330 | "!="
331 | "<="
332 | ">="
333 | "&&"
334 | "||"
335 | "??"
336 | "?."
337 | "++"
338 | "--"
339 | "<<"
340 | ">>"
341 | "**"
342 | "+="
343 | "-="
344 | "*="
345 | "/="
346 | "%="
347 | "&="
348 | "|="
349 | "^="
350 | "&&="
351 | "||="
352 ) {
353 return 2;
354 }
355 let c = char_at(unsafe { std::str::from_utf8_unchecked(bytes) }, i);
356 c.len_utf8()
357}
358
359fn peek(bytes: &[u8], i: usize) -> Option<u8> {
360 bytes.get(i).copied()
361}
362
363fn char_at(src: &str, i: usize) -> char {
364 src[i..].chars().next().unwrap_or('\0')
365}
366
367#[cfg(test)]
368mod tests {
369 use super::*;
370
371 fn min(s: &str) -> String {
372 minify(s, &MinifyOptions::default()).unwrap().body
373 }
374
375 #[test]
376 fn basic_function() {
377 let src = "function add(a, b) {\n return a + b;\n}\n";
378 let out = min(src);
379 assert_eq!(out, "function add(a,b){\nreturn a+b;\n}\n");
381 }
382
383 #[test]
384 fn strips_line_comment() {
385 let src = "// hi\nlet x = 1;\n";
386 let out = min(src);
387 assert_eq!(out, "\nlet x=1;\n");
388 }
389
390 #[test]
391 fn strips_block_comment_inline() {
392 let src = "let x = /* y */ 1;\n";
393 let out = min(src);
394 assert_eq!(out, "let x=1;\n");
395 }
396
397 #[test]
398 fn template_literal() {
399 let src = "const s = `hello, ${name}!`;\n";
400 let out = min(src);
401 assert!(out.contains("`hello, ${name}!`"), "got: {}", out);
402 }
403
404 #[test]
405 fn nested_template() {
406 let src = "const s = `a${`b${c}d`}e`;\n";
407 let out = min(src);
408 assert!(out.contains("`a${`b${c}d`}e`"), "got: {}", out);
409 }
410
411 #[test]
412 fn template_with_string_in_interpolation() {
413 let src = "const s = `${\"hi\"}`;\n";
414 let out = min(src);
415 assert!(out.contains("`${\"hi\"}`"), "got: {}", out);
416 }
417
418 #[test]
419 fn regex_literal() {
420 let src = "const re = /[a-z]+/gi;\n";
421 let out = min(src);
422 assert_eq!(out, "const re=/[a-z]+/gi;\n");
423 }
424
425 #[test]
426 fn regex_after_return() {
427 let src = "function f() { return /\\d+/.test(x); }\n";
428 let out = min(src);
429 assert!(out.contains("/\\d+/"), "got: {}", out);
430 }
431
432 #[test]
433 fn division_after_value() {
434 let src = "const x = a / b;\n";
435 let out = min(src);
436 assert_eq!(out, "const x=a/b;\n");
437 }
438
439 #[test]
440 fn division_after_paren() {
441 let src = "const x = (a + b) / c;\n";
442 let out = min(src);
443 assert_eq!(out, "const x=(a+b)/c;\n");
444 }
445
446 #[test]
447 fn return_then_object_preserves_newline() {
448 let src = "function f() {\n return\n {x: 1};\n}\n";
451 let out = min(src);
452 assert!(
453 out.contains("return\n"),
454 "newline preserved after return: {:?}",
455 out
456 );
457 }
458
459 #[test]
460 fn arrow_function() {
461 let src = "const f = (x) => x + 1;\n";
462 let out = min(src);
463 assert_eq!(out, "const f=(x)=>x+1;\n");
464 }
465
466 #[test]
467 fn nullish_coalescing() {
468 let src = "const x = a ?? b;\n";
469 let out = min(src);
470 assert_eq!(out, "const x=a??b;\n");
471 }
472
473 #[test]
474 fn optional_chaining() {
475 let src = "const x = obj?.prop;\n";
476 let out = min(src);
477 assert_eq!(out, "const x=obj?.prop;\n");
478 }
479
480 #[test]
481 fn strict_equality() {
482 let src = "if (a === b) {}\n";
483 let out = min(src);
484 assert_eq!(out, "if(a===b){}\n");
485 }
486
487 #[test]
488 fn typescript_type_annotation() {
489 let src = "function f(x: number): string { return String(x); }\n";
490 let out = min(src);
491 assert_eq!(out, "function f(x:number):string{return String(x);}\n");
493 }
494
495 #[test]
496 fn typescript_generic() {
497 let src = "function f<T>(x: T): T { return x; }\n";
498 let out = min(src);
499 assert_eq!(out, "function f<T>(x:T):T{return x;}\n");
500 }
501
502 #[test]
503 fn double_quoted_string_with_escape() {
504 let src = "const s = \"a\\\"b\";\n";
505 let out = min(src);
506 assert_eq!(out, "const s=\"a\\\"b\";\n");
507 }
508
509 #[test]
510 fn dollar_in_identifier() {
511 let src = "const $foo = 1;\n";
512 let out = min(src);
513 assert_eq!(out, "const $foo=1;\n");
514 }
515
516 #[test]
517 fn keep_comments_converts_line() {
518 let src = "// hi\nlet x = 1;\n";
519 let r = minify(
520 src,
521 &MinifyOptions {
522 keep_comments: true,
523 },
524 )
525 .unwrap();
526 assert!(r.body.contains("/* hi*/"));
527 assert_eq!(r.warnings.len(), 1);
528 }
529
530 #[test]
531 fn unterminated_string() {
532 assert!(minify("const s = \"oops", &MinifyOptions::default()).is_err());
533 }
534
535 #[test]
536 fn unterminated_template() {
537 assert!(minify("const s = `oops", &MinifyOptions::default()).is_err());
538 }
539
540 #[test]
541 fn unterminated_regex() {
542 assert!(minify("const r = /oops", &MinifyOptions::default()).is_err());
543 }
544
545 #[test]
546 fn regex_with_class() {
547 let src = "const r = /[/]/g;\n";
548 let out = min(src);
549 assert!(out.contains("/[/]/g"), "got: {}", out);
550 }
551
552 #[test]
553 fn regex_at_start_of_file() {
554 let src = "/abc/.test(s)\n";
555 let out = min(src);
556 assert!(out.starts_with("/abc/"), "got: {}", out);
557 }
558}