1use super::c_common::{Token, TokenKind, emit_aggressive};
22use super::{MinifyError, MinifyOptions, MinifyOutput};
23
24pub fn minify(source: &str, opts: &MinifyOptions) -> Result<MinifyOutput, MinifyError> {
25 let toks = tokenize(source)?;
26 emit_aggressive(&toks, opts.keep_comments)
27}
28
29fn tokenize(src: &str) -> Result<Vec<Token<'_>>, MinifyError> {
30 let bytes = src.as_bytes();
31 let mut out: Vec<Token<'_>> = Vec::new();
32 let mut i = 0usize;
33 while i < bytes.len() {
34 let c = bytes[i];
35 if matches!(c, b' ' | b'\t' | b'\r') {
37 i += 1;
38 continue;
39 }
40 if c == b'\n' {
41 out.push(Token::new(TokenKind::Newline));
42 i += 1;
43 continue;
44 }
45 if c == b'/' && peek(bytes, i + 1) == Some(b'/') {
47 let start = i + 2;
48 let mut j = start;
49 while j < bytes.len() && bytes[j] != b'\n' {
50 j += 1;
51 }
52 out.push(Token::new(TokenKind::LineComment(&src[start..j])));
53 i = j;
54 continue;
55 }
56 if c == b'/' && peek(bytes, i + 1) == Some(b'*') {
57 let body_start = i + 2;
59 let mut j = body_start;
60 let mut depth = 1usize;
61 while j < bytes.len() {
62 if bytes[j] == b'/' && peek(bytes, j + 1) == Some(b'*') {
63 depth += 1;
64 j += 2;
65 continue;
66 }
67 if bytes[j] == b'*' && peek(bytes, j + 1) == Some(b'/') {
68 depth -= 1;
69 if depth == 0 {
70 let body = &src[body_start..j];
71 out.push(Token::new(TokenKind::BlockComment(body)));
72 i = j + 2;
73 break;
74 }
75 j += 2;
76 continue;
77 }
78 j += 1;
79 }
80 if depth != 0 {
81 return Err(MinifyError::new("unterminated /* */ block comment"));
82 }
83 continue;
84 }
85 if c == b'r' || c == b'b' {
87 if let Some((tok, n)) = try_scan_special_string(src, i)? {
88 out.push(Token::new(TokenKind::StrLit(tok)));
89 i += n;
90 continue;
91 }
92 }
93 if c == b'"' {
95 let n = scan_dq_string(src, i)?;
96 out.push(Token::new(TokenKind::StrLit(&src[i..i + n])));
97 i += n;
98 continue;
99 }
100 if c == b'\'' {
102 let (kind, n) = scan_quote(src, i)?;
103 match kind {
104 QuoteKind::Char => out.push(Token::new(TokenKind::StrLit(&src[i..i + n]))),
105 QuoteKind::Lifetime => out.push(Token::new(TokenKind::Word(&src[i..i + n]))),
106 }
107 i += n;
108 continue;
109 }
110 if is_word_start(src, i) {
114 let n = scan_word(src, i);
115 out.push(Token::new(TokenKind::Word(&src[i..i + n])));
116 i += n;
117 continue;
118 }
119 let n = scan_multi_punct(bytes, i);
125 out.push(Token::new(TokenKind::Punct(&src[i..i + n])));
126 i += n;
127 }
128 Ok(out)
129}
130
131fn scan_multi_punct(bytes: &[u8], i: usize) -> usize {
132 let three = bytes
133 .get(i..i + 3)
134 .map(|s| std::str::from_utf8(s).unwrap_or(""))
135 .unwrap_or("");
136 let two = bytes
137 .get(i..i + 2)
138 .map(|s| std::str::from_utf8(s).unwrap_or(""))
139 .unwrap_or("");
140 if matches!(three, "..=" | "<<=" | ">>=") {
141 return 3;
142 }
143 if matches!(
144 two,
145 "->" | "=>"
146 | "::"
147 | "=="
148 | "!="
149 | "<="
150 | ">="
151 | "&&"
152 | "||"
153 | "<<"
154 | ">>"
155 | ".."
156 | "+="
157 | "-="
158 | "*="
159 | "/="
160 | "%="
161 | "&="
162 | "|="
163 | "^="
164 ) {
165 return 2;
166 }
167 let c = char_at(unsafe { std::str::from_utf8_unchecked(bytes) }, i);
169 c.len_utf8()
170}
171
172#[derive(Debug)]
173enum QuoteKind {
174 Char,
175 Lifetime,
176}
177
178fn try_scan_special_string(src: &str, i: usize) -> Result<Option<(&str, usize)>, MinifyError> {
179 let bytes = src.as_bytes();
180 let mut p = i;
181 let mut byte = false;
182 if bytes[p] == b'b' {
183 if peek(bytes, p + 1) == Some(b'\'') {
185 return Ok(None);
187 }
188 byte = true;
189 p += 1;
190 }
191 let mut raw = false;
192 if peek(bytes, p) == Some(b'r') && p > i {
193 raw = true;
195 p += 1;
196 } else if !byte && peek(bytes, p) == Some(b'r') {
197 raw = true;
198 p += 1;
199 }
200 let mut hashes = 0usize;
202 if raw {
203 while peek(bytes, p) == Some(b'#') {
204 hashes += 1;
205 p += 1;
206 }
207 }
208 if peek(bytes, p) != Some(b'"') {
210 return Ok(None);
213 }
214 let body_start = p + 1;
216 if raw {
217 let mut j = body_start;
219 loop {
220 if j >= bytes.len() {
221 return Err(MinifyError::new("unterminated raw string literal"));
222 }
223 if bytes[j] == b'"' {
224 let mut k = j + 1;
226 let mut found = 0;
227 while k < bytes.len() && bytes[k] == b'#' && found < hashes {
228 found += 1;
229 k += 1;
230 }
231 if found == hashes {
232 let total = k - i;
233 return Ok(Some((&src[i..i + total], total)));
234 }
235 }
236 j += 1;
237 }
238 } else {
239 let n = scan_dq_string(src, p)?;
241 let total = (p - i) + n;
242 Ok(Some((&src[i..i + total], total)))
243 }
244}
245
246fn scan_dq_string(src: &str, i: usize) -> Result<usize, MinifyError> {
247 let bytes = src.as_bytes();
248 debug_assert_eq!(bytes[i], b'"');
249 let mut j = i + 1;
250 while j < bytes.len() {
251 match bytes[j] {
252 b'\\' => {
253 j += 2;
254 }
255 b'"' => return Ok(j + 1 - i),
256 _ => {
257 j += 1;
258 }
259 }
260 }
261 Err(MinifyError::new("unterminated string literal"))
262}
263
264fn scan_quote(src: &str, i: usize) -> Result<(QuoteKind, usize), MinifyError> {
265 let bytes = src.as_bytes();
266 debug_assert_eq!(bytes[i], b'\'');
267 let after = i + 1;
271 if after >= bytes.len() {
272 return Err(MinifyError::new("unterminated `'`"));
273 }
274 if bytes[after] == b'\\' {
276 let mut j = after + 1;
277 if j >= bytes.len() {
279 return Err(MinifyError::new("unterminated char escape"));
280 }
281 let esc = bytes[j];
282 j += 1;
283 if esc == b'x' {
284 j = j.saturating_add(2).min(bytes.len()); } else if esc == b'u' && peek(bytes, j) == Some(b'{') {
286 j += 1;
288 while j < bytes.len() && bytes[j] != b'}' {
289 j += 1;
290 }
291 if j < bytes.len() {
292 j += 1;
293 }
294 }
295 if peek(bytes, j) != Some(b'\'') {
296 return Err(MinifyError::new("malformed char literal"));
297 }
298 return Ok((QuoteKind::Char, j + 1 - i));
299 }
300 let id_start = after;
302 let mut j = id_start;
303 while j < bytes.len() && is_id_continue(char_at(src, j)) {
304 j += char_at(src, j).len_utf8();
305 }
306 if j < bytes.len() && bytes[j] == b'\'' {
309 return Ok((QuoteKind::Char, j + 1 - i));
313 }
314 if j == id_start {
316 let cl = char_at(src, j).len_utf8();
319 if peek(bytes, j + cl) == Some(b'\'') {
320 return Ok((QuoteKind::Char, j + cl + 1 - i));
321 }
322 return Err(MinifyError::new("malformed `'` token"));
323 }
324 Ok((QuoteKind::Lifetime, j - i))
325}
326
327fn is_word_start(src: &str, i: usize) -> bool {
328 let c = char_at(src, i);
329 c.is_alphabetic() || c == '_' || c.is_ascii_digit()
330}
331
332fn is_id_continue(c: char) -> bool {
333 c.is_alphanumeric() || c == '_'
334}
335
336fn scan_word(src: &str, i: usize) -> usize {
337 let mut j = i;
338 let bytes = src.as_bytes();
339 let len = bytes.len();
340 while j < len {
341 let c = char_at(src, j);
342 if c.is_alphanumeric() || c == '_' {
343 j += c.len_utf8();
344 continue;
345 }
346 if c == '.' && j > i {
355 let next = peek(bytes, j + 1);
358 if matches!(next, Some(b'0'..=b'9')) {
359 j += 1;
360 continue;
361 }
362 }
363 break;
364 }
365 j - i
366}
367
368fn peek(bytes: &[u8], i: usize) -> Option<u8> {
369 bytes.get(i).copied()
370}
371
372fn char_at(src: &str, i: usize) -> char {
373 src[i..].chars().next().unwrap_or('\0')
374}
375
376#[cfg(test)]
377mod tests {
378 use super::*;
379
380 fn min(s: &str) -> String {
381 minify(s, &MinifyOptions::default()).unwrap().body
382 }
383
384 fn min_keep(s: &str) -> String {
385 minify(
386 s,
387 &MinifyOptions {
388 keep_comments: true,
389 },
390 )
391 .unwrap()
392 .body
393 }
394
395 #[test]
396 fn basic_function() {
397 let src = "fn add(a: i32, b: i32) -> i32 {\n a + b\n}\n";
398 let out = min(src);
399 assert_eq!(out, "fn add(a:i32,b:i32)->i32{a+b}");
400 }
401
402 #[test]
403 fn strips_line_comments() {
404 let src = "fn x() {\n // hi\n 1\n}\n";
405 let out = min(src);
406 assert_eq!(out, "fn x(){1}");
407 }
408
409 #[test]
410 fn strips_doc_comments() {
411 let src = "/// docs go here\nfn x() {}\n";
413 let out = min(src);
414 assert_eq!(out, "fn x(){}");
415 }
416
417 #[test]
418 fn nested_block_comment_stripped() {
419 let src = "fn x() { /* outer /* inner */ outer */ 1 }";
420 let out = min(src);
421 assert_eq!(out, "fn x(){1}");
422 }
423
424 #[test]
425 fn keep_comments_converts_line_to_block() {
426 let src = "fn x() {\n // hello\n 1\n}\n";
427 let r = minify(
428 src,
429 &MinifyOptions {
430 keep_comments: true,
431 },
432 )
433 .unwrap();
434 assert!(r.body.contains("/* hello*/"));
435 assert_eq!(r.warnings.len(), 1);
436 }
437
438 #[test]
439 fn keep_comments_preserves_block_comment() {
440 let src = "fn x() { /* hello */ 1 }";
441 let out = min_keep(src);
442 assert!(out.contains("/* hello */"));
443 }
444
445 #[test]
446 fn raw_string_simple() {
447 let src = r#"let s = r"hello";"#;
448 let out = min(src);
449 assert_eq!(out, r#"let s=r"hello";"#);
450 }
451
452 #[test]
453 fn raw_string_with_hashes() {
454 let src = "let s = r##\"con\"tains\"##;";
457 let out = min(src);
458 assert!(out.contains("r##\"con\"tains\"##"), "got: {}", out);
459 }
460
461 #[test]
462 fn byte_string() {
463 let src = r#"let s = b"\xff\x00";"#;
464 let out = min(src);
465 assert_eq!(out, r#"let s=b"\xff\x00";"#);
466 }
467
468 #[test]
469 fn raw_byte_string() {
470 let src = r#"let s = br"raw bytes";"#;
471 let out = min(src);
472 assert!(out.contains(r#"br"raw bytes""#));
473 }
474
475 #[test]
476 fn lifetime_preserved() {
477 let src = "fn foo<'a>(x: &'a str) -> &'a str { x }";
478 let out = min(src);
479 assert_eq!(out, "fn foo<'a>(x:&'a str)->&'a str{x}");
480 }
481
482 #[test]
483 fn static_lifetime() {
484 let src = "let s: &'static str = \"hi\";";
485 let out = min(src);
486 assert_eq!(out, "let s:&'static str=\"hi\";");
487 }
488
489 #[test]
490 fn char_literal() {
491 let src = "let c = 'a'; let d = '\\n'; let e = '\\u{1F600}';";
492 let out = min(src);
493 assert!(out.contains("'a'"));
494 assert!(out.contains("'\\n'"));
495 assert!(out.contains("'\\u{1F600}'"));
496 }
497
498 #[test]
499 fn byte_char() {
500 let src = "let c = b'a';";
501 let out = min(src);
502 assert_eq!(out, "let c=b'a';");
503 }
504
505 #[test]
506 fn underscored_number() {
507 let src = "let n = 1_000_000;";
508 let out = min(src);
509 assert_eq!(out, "let n=1_000_000;");
510 }
511
512 #[test]
513 fn hex_number_with_suffix() {
514 let src = "let n = 0xFF_u32;";
515 let out = min(src);
516 assert_eq!(out, "let n=0xFF_u32;");
517 }
518
519 #[test]
520 fn float_literal() {
521 let src = "let f = 1.5e10;";
522 let out = min(src);
523 assert_eq!(out, "let f=1.5e10;");
524 }
525
526 #[test]
527 fn double_colon_preserved() {
528 let src = "use std::collections::HashMap;";
529 let out = min(src);
530 assert_eq!(out, "use std::collections::HashMap;");
531 }
532
533 #[test]
534 fn arrow_preserved() {
535 let src = "fn x() -> i32 { 0 }";
536 let out = min(src);
537 assert_eq!(out, "fn x()->i32{0}");
538 }
539
540 #[test]
541 fn fat_arrow_preserved() {
542 let src = "match x { 1 => true, _ => false }";
543 let out = min(src);
544 assert_eq!(out, "match x{1=>true,_=>false}");
545 }
546
547 #[test]
548 fn unicode_identifier() {
549 let src = "let π = 3.14;";
550 let out = min(src);
551 assert_eq!(out, "let π=3.14;");
552 }
553
554 #[test]
555 fn range_operator() {
556 let src = "let r = 1..5;";
557 let out = min(src);
558 assert_eq!(out, "let r=1..5;");
559 }
560
561 #[test]
562 fn unterminated_string_errors() {
563 let src = "let s = \"unterminated";
564 assert!(minify(src, &MinifyOptions::default()).is_err());
565 }
566
567 #[test]
568 fn unterminated_block_comment_errors() {
569 let src = "fn x() { /* no end";
570 assert!(minify(src, &MinifyOptions::default()).is_err());
571 }
572
573 #[test]
574 fn nested_block_comment_unbalanced_errors() {
575 let src = "fn x() { /* /* */ }";
576 assert!(minify(src, &MinifyOptions::default()).is_err());
577 }
578}