1use super::c_common::{Token, TokenKind, emit_aggressive};
17use super::{MinifyError, MinifyOptions, MinifyOutput};
18
19pub fn minify(source: &str, opts: &MinifyOptions) -> Result<MinifyOutput, MinifyError> {
20 let toks = tokenize(source)?;
21 emit_aggressive(&toks, opts.keep_comments)
22}
23
24fn tokenize(src: &str) -> Result<Vec<Token<'_>>, MinifyError> {
25 let bytes = src.as_bytes();
26 let mut out: Vec<Token<'_>> = Vec::new();
27 let mut i = 0usize;
28 while i < bytes.len() {
29 let c = bytes[i];
30 if matches!(c, b' ' | b'\t' | b'\r') {
31 i += 1;
32 continue;
33 }
34 if c == b'\n' {
35 out.push(Token::new(TokenKind::Newline));
36 i += 1;
37 continue;
38 }
39 if c == b'/' && peek(bytes, i + 1) == Some(b'/') {
40 let start = i + 2;
41 let mut j = start;
42 while j < bytes.len() && bytes[j] != b'\n' {
43 j += 1;
44 }
45 out.push(Token::new(TokenKind::LineComment(&src[start..j])));
46 i = j;
47 continue;
48 }
49 if c == b'/' && peek(bytes, i + 1) == Some(b'*') {
50 let body_start = i + 2;
51 let mut j = body_start;
52 while j + 1 < bytes.len() {
53 if bytes[j] == b'*' && bytes[j + 1] == b'/' {
54 let body = &src[body_start..j];
55 out.push(Token::new(TokenKind::BlockComment(body)));
56 i = j + 2;
57 break;
58 }
59 j += 1;
60 }
61 if i <= body_start {
62 return Err(MinifyError::new("unterminated /* */ block comment"));
63 }
64 continue;
65 }
66 if c == b'"' && peek(bytes, i + 1) == Some(b'"') && peek(bytes, i + 2) == Some(b'"') {
68 let start = i;
69 let mut j = i + 3;
70 loop {
71 if j + 2 >= bytes.len() {
72 return Err(MinifyError::new("unterminated text block"));
73 }
74 if bytes[j] == b'"' && bytes[j + 1] == b'"' && bytes[j + 2] == b'"' {
75 let escaped = j > start + 3 && bytes[j - 1] == b'\\';
78 if !escaped {
79 j += 3;
80 break;
81 }
82 }
83 j += 1;
84 }
85 out.push(Token::new(TokenKind::StrLit(&src[start..j])));
86 i = j;
87 continue;
88 }
89 if c == b'"' {
90 let n = scan_dq_string(src, i)?;
91 out.push(Token::new(TokenKind::StrLit(&src[i..i + n])));
92 i += n;
93 continue;
94 }
95 if c == b'\'' {
96 let n = scan_char_literal(src, i)?;
97 out.push(Token::new(TokenKind::StrLit(&src[i..i + n])));
98 i += n;
99 continue;
100 }
101 if c == b'@' && peek(bytes, i + 1).map_or(false, is_ident_start_byte) {
104 let mut j = i + 1;
105 while j < bytes.len() && is_ident_continue_byte(bytes[j]) {
106 j += 1;
107 }
108 out.push(Token::new(TokenKind::Word(&src[i..j])));
109 i = j;
110 continue;
111 }
112 if is_word_start(src, i) {
113 let n = scan_word(src, i);
114 out.push(Token::new(TokenKind::Word(&src[i..i + n])));
115 i += n;
116 continue;
117 }
118 let n = scan_multi_punct(bytes, i);
119 out.push(Token::new(TokenKind::Punct(&src[i..i + n])));
120 i += n;
121 }
122 Ok(out)
123}
124
125fn scan_dq_string(src: &str, i: usize) -> Result<usize, MinifyError> {
126 let bytes = src.as_bytes();
127 debug_assert_eq!(bytes[i], b'"');
128 let mut j = i + 1;
129 while j < bytes.len() {
130 match bytes[j] {
131 b'\\' => {
132 j += 2;
133 }
134 b'"' => return Ok(j + 1 - i),
135 b'\n' => return Err(MinifyError::new("newline in string literal")),
136 _ => j += 1,
137 }
138 }
139 Err(MinifyError::new("unterminated string literal"))
140}
141
142fn scan_char_literal(src: &str, i: usize) -> Result<usize, MinifyError> {
143 let bytes = src.as_bytes();
144 debug_assert_eq!(bytes[i], b'\'');
145 let mut j = i + 1;
146 if j >= bytes.len() {
147 return Err(MinifyError::new("unterminated char literal"));
148 }
149 if bytes[j] == b'\\' {
150 j += 2;
151 while j < bytes.len() && bytes[j] != b'\'' && bytes[j] != b'\n' {
153 j += 1;
154 }
155 } else {
156 j += char_at(src, j).len_utf8();
158 }
159 if peek(bytes, j) != Some(b'\'') {
160 return Err(MinifyError::new("malformed char literal"));
161 }
162 Ok(j + 1 - i)
163}
164
165fn is_ident_start_byte(b: u8) -> bool {
166 b.is_ascii_alphabetic() || b == b'_' || b == b'$'
167}
168fn is_ident_continue_byte(b: u8) -> bool {
169 b.is_ascii_alphanumeric() || b == b'_' || b == b'$'
170}
171
172fn is_word_start(src: &str, i: usize) -> bool {
173 let c = char_at(src, i);
174 c.is_alphabetic() || c == '_' || c == '$' || c.is_ascii_digit()
175}
176
177fn scan_word(src: &str, i: usize) -> usize {
178 let bytes = src.as_bytes();
179 let mut j = i;
180 while j < bytes.len() {
181 let c = char_at(src, j);
182 if c.is_alphanumeric() || c == '_' || c == '$' {
183 j += c.len_utf8();
184 continue;
185 }
186 if c == '.' {
187 let next = peek(bytes, j + 1);
189 if matches!(next, Some(b'0'..=b'9')) && j > i {
190 j += 1;
191 continue;
192 }
193 }
194 break;
195 }
196 j - i
197}
198
199fn scan_multi_punct(bytes: &[u8], i: usize) -> usize {
200 let three = bytes
201 .get(i..i + 3)
202 .map(|s| std::str::from_utf8(s).unwrap_or(""))
203 .unwrap_or("");
204 let two = bytes
205 .get(i..i + 2)
206 .map(|s| std::str::from_utf8(s).unwrap_or(""))
207 .unwrap_or("");
208 if matches!(three, "<<=" | ">>=" | ">>>" | "..." | "->>") {
209 return 3;
210 }
211 if matches!(
212 two,
213 "->" | "=="
214 | "!="
215 | "<="
216 | ">="
217 | "&&"
218 | "||"
219 | "<<"
220 | ">>"
221 | "+="
222 | "-="
223 | "*="
224 | "/="
225 | "%="
226 | "&="
227 | "|="
228 | "^="
229 | "++"
230 | "--"
231 ) {
232 return 2;
233 }
234 let c = char_at(unsafe { std::str::from_utf8_unchecked(bytes) }, i);
235 c.len_utf8()
236}
237
238fn peek(bytes: &[u8], i: usize) -> Option<u8> {
239 bytes.get(i).copied()
240}
241
242fn char_at(src: &str, i: usize) -> char {
243 src[i..].chars().next().unwrap_or('\0')
244}
245
246#[cfg(test)]
247mod tests {
248 use super::*;
249
250 fn min(s: &str) -> String {
251 minify(s, &MinifyOptions::default()).unwrap().body
252 }
253
254 #[test]
255 fn class_with_method() {
256 let src = "public class Foo {\n public int add(int a, int b) {\n return a + b;\n }\n}\n";
257 let out = min(src);
258 assert_eq!(
259 out,
260 "public class Foo{public int add(int a,int b){return a+b;}}"
261 );
262 }
263
264 #[test]
265 fn strips_line_comment() {
266 let src = "// hi\nint x;\n";
267 let out = min(src);
268 assert_eq!(out, "int x;");
269 }
270
271 #[test]
272 fn strips_block_comment() {
273 let src = "/* hi */ int x;\n";
274 let out = min(src);
275 assert_eq!(out, "int x;");
276 }
277
278 #[test]
279 fn annotation_preserved() {
280 let src = "@Override public void f() {}";
281 let out = min(src);
282 assert_eq!(out, "@Override public void f(){}");
283 }
284
285 #[test]
286 fn annotation_with_args() {
287 let src = "@SuppressWarnings(\"unchecked\") void f() {}";
288 let out = min(src);
289 assert_eq!(out, "@SuppressWarnings(\"unchecked\")void f(){}");
290 }
291
292 #[test]
293 fn text_block_preserved() {
294 let src = "String s = \"\"\"\nhello\nworld\n\"\"\";\n";
295 let out = min(src);
296 assert!(out.contains("\"\"\"\nhello\nworld\n\"\"\""));
297 }
298
299 #[test]
300 fn string_with_escape() {
301 let src = "String s = \"a\\\"b\";";
302 let out = min(src);
303 assert_eq!(out, "String s=\"a\\\"b\";");
304 }
305
306 #[test]
307 fn char_literal() {
308 let src = "char c = 'a';";
309 let out = min(src);
310 assert_eq!(out, "char c='a';");
311 }
312
313 #[test]
314 fn keep_comments_converts_line() {
315 let src = "// hi\nint x;\n";
316 let r = minify(
317 src,
318 &MinifyOptions {
319 keep_comments: true,
320 },
321 )
322 .unwrap();
323 assert!(r.body.starts_with("/* hi*/"));
324 assert_eq!(r.warnings.len(), 1);
325 }
326
327 #[test]
328 fn dollar_in_identifier() {
329 let src = "int $x = 1;";
330 let out = min(src);
331 assert_eq!(out, "int $x=1;");
332 }
333
334 #[test]
335 fn unterminated_string_errors() {
336 assert!(minify("String s = \"oops", &MinifyOptions::default()).is_err());
337 }
338
339 #[test]
340 fn unterminated_block_comment_errors() {
341 assert!(minify("/* unterminated", &MinifyOptions::default()).is_err());
342 }
343
344 #[test]
345 fn lambda_arrow() {
346 let src = "x -> x + 1";
347 let out = min(src);
348 assert_eq!(out, "x->x+1");
349 }
350
351 #[test]
352 fn diamond_operator() {
353 let src = "List<Integer> xs = new ArrayList<>();";
354 let out = min(src);
355 assert_eq!(out, "List<Integer>xs=new ArrayList<>();");
358 }
359}