1use super::c_common::{Token, TokenKind};
24use super::{MinifyError, MinifyOptions, MinifyOutput, MinifyWarning};
25
26pub fn minify(
27 source: &str,
28 opts: &MinifyOptions,
29 is_cpp: bool,
30) -> Result<MinifyOutput, MinifyError> {
31 let toks = tokenize(source, is_cpp)?;
32 emit(&toks, opts.keep_comments)
33}
34
35fn tokenize(src: &str, is_cpp: bool) -> Result<Vec<Token<'_>>, MinifyError> {
36 let bytes = src.as_bytes();
37 let mut out: Vec<Token<'_>> = Vec::new();
38 let mut i = 0usize;
39 let mut at_line_start = true;
40 while i < bytes.len() {
41 let c = bytes[i];
42 if matches!(c, b' ' | b'\t' | b'\r') {
43 i += 1;
44 continue;
45 }
46 if c == b'\n' {
47 out.push(Token::new(TokenKind::Newline));
48 i += 1;
49 at_line_start = true;
50 continue;
51 }
52 if at_line_start && c == b'#' {
56 let start = i;
57 let mut j = i;
58 while j < bytes.len() {
59 if bytes[j] == b'\\' && peek(bytes, j + 1) == Some(b'\n') {
60 j += 2;
61 continue;
62 }
63 if bytes[j] == b'\\'
64 && peek(bytes, j + 1) == Some(b'\r')
65 && peek(bytes, j + 2) == Some(b'\n')
66 {
67 j += 3;
68 continue;
69 }
70 if bytes[j] == b'\n' {
71 break;
72 }
73 j += 1;
74 }
75 out.push(Token::new(TokenKind::Preproc(&src[start..j])));
76 i = j;
77 at_line_start = false;
80 continue;
81 }
82 at_line_start = false;
83 if c == b'/' && peek(bytes, i + 1) == Some(b'/') {
84 let start = i + 2;
85 let mut j = start;
86 while j < bytes.len() && bytes[j] != b'\n' {
87 j += 1;
88 }
89 out.push(Token::new(TokenKind::LineComment(&src[start..j])));
90 i = j;
91 continue;
92 }
93 if c == b'/' && peek(bytes, i + 1) == Some(b'*') {
94 let body_start = i + 2;
95 let mut j = body_start;
96 let mut found = false;
97 while j + 1 < bytes.len() {
98 if bytes[j] == b'*' && bytes[j + 1] == b'/' {
99 found = true;
100 break;
101 }
102 j += 1;
103 }
104 if !found {
105 return Err(MinifyError::new("unterminated /* */ block comment"));
106 }
107 out.push(Token::new(TokenKind::BlockComment(&src[body_start..j])));
108 i = j + 2;
109 continue;
110 }
111 if let Some(n) = try_scan_string(src, i, is_cpp)? {
115 out.push(Token::new(TokenKind::StrLit(&src[i..i + n])));
116 i += n;
117 continue;
118 }
119 if c == b'\'' {
120 let n = scan_char_literal(src, i)?;
121 out.push(Token::new(TokenKind::StrLit(&src[i..i + n])));
122 i += n;
123 continue;
124 }
125 if is_word_start(src, i) {
126 let n = scan_word(src, i);
127 out.push(Token::new(TokenKind::Word(&src[i..i + n])));
128 i += n;
129 continue;
130 }
131 let n = scan_multi_punct(bytes, i);
132 out.push(Token::new(TokenKind::Punct(&src[i..i + n])));
133 i += n;
134 }
135 Ok(out)
136}
137
138fn emit(tokens: &[Token<'_>], keep_comments: bool) -> Result<MinifyOutput, MinifyError> {
139 let mut out = String::new();
140 let mut warnings: Vec<MinifyWarning> = Vec::new();
141 let mut prev_emit_last: Option<char> = None;
142 let mut last_was_preproc = false;
143 for tok in tokens {
144 match &tok.kind {
145 TokenKind::Newline => {
146 if last_was_preproc && !out.ends_with('\n') {
150 out.push('\n');
151 prev_emit_last = None;
152 last_was_preproc = false;
153 }
154 }
155 TokenKind::LineComment(body) => {
156 if !keep_comments {
157 continue;
158 }
159 let block = format!("/*{}*/", body);
160 push_with_space(&mut out, &mut prev_emit_last, &block);
161 warnings.push(MinifyWarning::LineCommentConverted);
162 }
163 TokenKind::BlockComment(body) => {
164 if !keep_comments {
165 continue;
166 }
167 let block = format!("/*{}*/", body);
168 push_with_space(&mut out, &mut prev_emit_last, &block);
169 }
170 TokenKind::Word(s)
171 | TokenKind::Punct(s)
172 | TokenKind::StrLit(s)
173 | TokenKind::Template(s)
174 | TokenKind::Regex(s) => {
175 push_with_space(&mut out, &mut prev_emit_last, s);
176 last_was_preproc = false;
177 }
178 TokenKind::Preproc(s) => {
179 if !out.is_empty() && !out.ends_with('\n') {
180 out.push('\n');
181 }
182 out.push_str(s);
183 prev_emit_last = None;
184 last_was_preproc = true;
185 }
186 }
187 }
188 if last_was_preproc && !out.ends_with('\n') {
189 out.push('\n');
190 }
191 Ok(MinifyOutput {
192 body: out,
193 warnings,
194 })
195}
196
197fn push_with_space(out: &mut String, prev_emit_last: &mut Option<char>, s: &str) {
198 if s.is_empty() {
199 return;
200 }
201 use super::c_common::needs_space;
202 if let Some(prev) = *prev_emit_last {
203 if let Some(next) = s.chars().next() {
204 if needs_space(prev, next) {
205 out.push(' ');
206 }
207 }
208 }
209 out.push_str(s);
210 *prev_emit_last = s.chars().next_back();
211}
212
213fn try_scan_string(src: &str, i: usize, is_cpp: bool) -> Result<Option<usize>, MinifyError> {
214 let bytes = src.as_bytes();
215 let mut p = i;
218 let mut had_prefix = false;
219 if peek(bytes, p) == Some(b'u') && peek(bytes, p + 1) == Some(b'8') {
221 let after = p + 2;
223 if peek(bytes, after) == Some(b'"')
224 || (is_cpp && peek(bytes, after) == Some(b'R') && peek(bytes, after + 1) == Some(b'"'))
225 {
226 p = after;
227 had_prefix = true;
228 }
229 } else if matches!(peek(bytes, p), Some(b'L') | Some(b'u') | Some(b'U')) {
230 let after = p + 1;
231 if peek(bytes, after) == Some(b'"')
232 || (is_cpp && peek(bytes, after) == Some(b'R') && peek(bytes, after + 1) == Some(b'"'))
233 {
234 p = after;
235 had_prefix = true;
236 }
237 }
238 let raw = is_cpp && peek(bytes, p) == Some(b'R') && peek(bytes, p + 1) == Some(b'"');
239 if raw {
240 p += 1; debug_assert_eq!(bytes[p], b'"');
243 let delim_start = p + 1;
244 let mut j = delim_start;
245 while j < bytes.len() && bytes[j] != b'(' {
246 j += 1;
247 }
248 if j >= bytes.len() {
249 return Err(MinifyError::new("malformed raw string"));
250 }
251 let delim = &bytes[delim_start..j];
252 let body_start = j + 1;
253 let mut k = body_start;
255 loop {
256 if k >= bytes.len() {
257 return Err(MinifyError::new("unterminated raw string"));
258 }
259 if bytes[k] == b')' && k + 1 + delim.len() < bytes.len() {
260 if &bytes[k + 1..k + 1 + delim.len()] == delim
261 && bytes.get(k + 1 + delim.len()) == Some(&b'"')
262 {
263 let total = k + 1 + delim.len() + 1 - i;
264 return Ok(Some(total));
265 }
266 }
267 k += 1;
268 }
269 }
270 if peek(bytes, p) == Some(b'"') {
271 let n = scan_dq_string(src, p)?;
272 return Ok(Some(p + n - i));
273 }
274 if had_prefix {
275 return Ok(None);
277 }
278 Ok(None)
279}
280
281fn scan_dq_string(src: &str, i: usize) -> Result<usize, MinifyError> {
282 let bytes = src.as_bytes();
283 debug_assert_eq!(bytes[i], b'"');
284 let mut j = i + 1;
285 while j < bytes.len() {
286 match bytes[j] {
287 b'\\' => j += 2,
288 b'"' => return Ok(j + 1 - i),
289 b'\n' => return Err(MinifyError::new("newline in string literal")),
290 _ => j += 1,
291 }
292 }
293 Err(MinifyError::new("unterminated string literal"))
294}
295
296fn scan_char_literal(src: &str, i: usize) -> Result<usize, MinifyError> {
297 let bytes = src.as_bytes();
298 debug_assert_eq!(bytes[i], b'\'');
299 let mut j = i + 1;
300 while j < bytes.len() {
301 if bytes[j] == b'\\' {
302 j += 2;
303 continue;
304 }
305 if bytes[j] == b'\'' {
306 return Ok(j + 1 - i);
307 }
308 if bytes[j] == b'\n' {
309 return Err(MinifyError::new("newline in char literal"));
310 }
311 j += 1;
312 }
313 Err(MinifyError::new("unterminated char literal"))
314}
315
316fn is_word_start(src: &str, i: usize) -> bool {
317 let c = char_at(src, i);
318 c.is_alphabetic() || c == '_' || c.is_ascii_digit()
319}
320
321fn scan_word(src: &str, i: usize) -> usize {
322 let bytes = src.as_bytes();
323 let mut j = i;
324 while j < bytes.len() {
325 let c = char_at(src, j);
326 if c.is_alphanumeric() || c == '_' {
327 j += c.len_utf8();
328 continue;
329 }
330 if c == '.' {
331 let next = peek(bytes, j + 1);
332 if matches!(next, Some(b'0'..=b'9')) && j > i {
333 j += 1;
334 continue;
335 }
336 }
337 break;
338 }
339 j - i
340}
341
342fn scan_multi_punct(bytes: &[u8], i: usize) -> usize {
343 let three = bytes
344 .get(i..i + 3)
345 .map(|s| std::str::from_utf8(s).unwrap_or(""))
346 .unwrap_or("");
347 let two = bytes
348 .get(i..i + 2)
349 .map(|s| std::str::from_utf8(s).unwrap_or(""))
350 .unwrap_or("");
351 if matches!(three, "<<=" | ">>=" | "..." | "->*") {
352 return 3;
353 }
354 if matches!(
355 two,
356 "->" | "::"
357 | "=="
358 | "!="
359 | "<="
360 | ">="
361 | "&&"
362 | "||"
363 | "<<"
364 | ">>"
365 | "+="
366 | "-="
367 | "*="
368 | "/="
369 | "%="
370 | "&="
371 | "|="
372 | "^="
373 | "++"
374 | "--"
375 | ".*"
376 ) {
377 return 2;
378 }
379 let c = char_at(unsafe { std::str::from_utf8_unchecked(bytes) }, i);
380 c.len_utf8()
381}
382
383fn peek(bytes: &[u8], i: usize) -> Option<u8> {
384 bytes.get(i).copied()
385}
386
387fn char_at(src: &str, i: usize) -> char {
388 src[i..].chars().next().unwrap_or('\0')
389}
390
391#[cfg(test)]
392mod tests {
393 use super::*;
394
395 fn min_c(s: &str) -> String {
396 minify(s, &MinifyOptions::default(), false).unwrap().body
397 }
398 fn min_cpp(s: &str) -> String {
399 minify(s, &MinifyOptions::default(), true).unwrap().body
400 }
401
402 #[test]
403 fn c_basic() {
404 let src = "int main() {\n return 0;\n}\n";
405 assert_eq!(min_c(src), "int main(){return 0;}");
406 }
407
408 #[test]
409 fn c_preprocessor_kept_on_own_line() {
410 let src = "#include <stdio.h>\nint main() { return 0; }\n";
411 let out = min_c(src);
412 assert!(
413 out.starts_with("#include <stdio.h>\n"),
414 "preproc on own line: {:?}",
415 out
416 );
417 assert!(out.contains("int main(){return 0;}"));
418 }
419
420 #[test]
421 fn c_multiple_preprocessor_lines() {
422 let src = "#include <stdio.h>\n#include <stdlib.h>\nint x;\n";
423 let out = min_c(src);
424 assert_eq!(out, "#include <stdio.h>\n#include <stdlib.h>\nint x;");
425 }
426
427 #[test]
428 fn c_define_with_continuation() {
429 let src = "#define FOO(x) \\\n do { x; } while (0)\nint y = 1;\n";
430 let out = min_c(src);
431 assert!(out.starts_with("#define FOO(x) \\\n do { x; } while (0)\n"));
434 assert!(out.ends_with("int y=1;"));
435 }
436
437 #[test]
438 fn c_strips_line_comment() {
439 let src = "// hi\nint x;\n";
440 assert_eq!(min_c(src), "int x;");
441 }
442
443 #[test]
444 fn c_strips_block_comment() {
445 let src = "/* hi */ int x;\n";
446 assert_eq!(min_c(src), "int x;");
447 }
448
449 #[test]
450 fn cpp_template_double_close() {
451 let src = "vector<vector<int>> v;";
456 let out = min_cpp(src);
457 assert_eq!(out, "vector<vector<int>>v;");
458 }
459
460 #[test]
461 fn cpp_template_with_space_at_close() {
462 let src = "vector<vector<int> > v;";
466 let out = min_cpp(src);
467 assert!(out.contains("> >"), "got: {}", out);
468 }
469
470 #[test]
471 fn cpp_raw_string() {
472 let src = r#"const char* s = R"x(hi)x";"#;
473 let out = min_cpp(src);
474 assert!(out.contains(r#"R"x(hi)x""#), "got: {}", out);
475 }
476
477 #[test]
478 fn cpp_wide_string() {
479 let src = "const wchar_t* s = L\"hi\";";
480 let out = min_cpp(src);
481 assert!(out.contains("L\"hi\""));
482 }
483
484 #[test]
485 fn cpp_u8_string() {
486 let src = "const char* s = u8\"hi\";";
487 let out = min_cpp(src);
488 assert!(out.contains("u8\"hi\""));
489 }
490
491 #[test]
492 fn cpp_arrow_member() {
493 let src = "p->x = 1;";
494 let out = min_cpp(src);
495 assert_eq!(out, "p->x=1;");
496 }
497
498 #[test]
499 fn cpp_scope_resolution() {
500 let src = "std::string s;";
501 let out = min_cpp(src);
502 assert_eq!(out, "std::string s;");
503 }
504
505 #[test]
506 fn c_keep_comments() {
507 let src = "// hi\nint x;\n";
508 let r = minify(
509 src,
510 &MinifyOptions {
511 keep_comments: true,
512 },
513 false,
514 )
515 .unwrap();
516 assert!(r.body.starts_with("/* hi*/"));
517 assert_eq!(r.warnings.len(), 1);
518 }
519
520 #[test]
521 fn c_unterminated_block_comment() {
522 assert!(minify("/* unterminated", &MinifyOptions::default(), false).is_err());
523 }
524
525 #[test]
526 fn c_unterminated_string() {
527 assert!(minify("char* s = \"oops", &MinifyOptions::default(), false).is_err());
528 }
529}