1use super::c_common::{Token, TokenKind, emit_aggressive};
17use super::{MinifyError, MinifyOptions, MinifyOutput};
18
19pub fn minify(source: &str, opts: &MinifyOptions) -> Result<MinifyOutput, MinifyError> {
20 let toks = tokenize(source)?;
21 emit_aggressive(&toks, opts.keep_comments)
22}
23
24fn tokenize(src: &str) -> Result<Vec<Token<'_>>, MinifyError> {
25 let bytes = src.as_bytes();
26 let mut out: Vec<Token<'_>> = Vec::new();
27 let mut i = 0usize;
28 while i < bytes.len() {
29 let c = bytes[i];
30 if matches!(c, b' ' | b'\t' | b'\r') {
31 i += 1;
32 continue;
33 }
34 if c == b'\n' {
35 out.push(Token::new(TokenKind::Newline));
36 i += 1;
37 continue;
38 }
39 if c == b'-' && peek(bytes, i + 1) == Some(b'-') {
40 let start = i + 2;
41 let mut j = start;
42 while j < bytes.len() && bytes[j] != b'\n' {
43 j += 1;
44 }
45 out.push(Token::new(TokenKind::LineComment(&src[start..j])));
46 i = j;
47 continue;
48 }
49 if c == b'/' && peek(bytes, i + 1) == Some(b'*') {
50 let body_start = i + 2;
52 let mut j = body_start;
53 let mut depth = 1usize;
54 while j < bytes.len() {
55 if bytes[j] == b'/' && peek(bytes, j + 1) == Some(b'*') {
56 depth += 1;
57 j += 2;
58 continue;
59 }
60 if bytes[j] == b'*' && peek(bytes, j + 1) == Some(b'/') {
61 depth -= 1;
62 if depth == 0 {
63 let body = &src[body_start..j];
64 out.push(Token::new(TokenKind::BlockComment(body)));
65 i = j + 2;
66 break;
67 }
68 j += 2;
69 continue;
70 }
71 j += 1;
72 }
73 if depth != 0 {
74 return Err(MinifyError::new("unterminated /* */ block comment"));
75 }
76 continue;
77 }
78 if c == b'\'' {
79 let n = scan_sq_string(src, i)?;
80 out.push(Token::new(TokenKind::StrLit(&src[i..i + n])));
81 i += n;
82 continue;
83 }
84 if c == b'"' {
85 let n = scan_quoted_ident(src, i)?;
86 out.push(Token::new(TokenKind::StrLit(&src[i..i + n])));
87 i += n;
88 continue;
89 }
90 if c == b'$' {
91 if let Some((tag_end, body_end)) = try_scan_dollar_quoted(bytes, i) {
94 out.push(Token::new(TokenKind::StrLit(&src[i..body_end])));
95 i = body_end;
96 let _ = tag_end;
97 continue;
98 }
99 let mut j = i + 1;
101 while j < bytes.len() && bytes[j].is_ascii_digit() {
102 j += 1;
103 }
104 if j > i + 1 {
105 out.push(Token::new(TokenKind::Word(&src[i..j])));
106 i = j;
107 continue;
108 }
109 out.push(Token::new(TokenKind::Punct(&src[i..i + 1])));
111 i += 1;
112 continue;
113 }
114 if is_word_start(src, i) {
115 let n = scan_word(src, i);
116 out.push(Token::new(TokenKind::Word(&src[i..i + n])));
117 i += n;
118 continue;
119 }
120 let n = scan_multi_punct(bytes, i);
121 out.push(Token::new(TokenKind::Punct(&src[i..i + n])));
122 i += n;
123 }
124 Ok(out)
125}
126
127fn scan_sq_string(src: &str, i: usize) -> Result<usize, MinifyError> {
128 let bytes = src.as_bytes();
129 debug_assert_eq!(bytes[i], b'\'');
130 let mut j = i + 1;
131 while j < bytes.len() {
132 if bytes[j] == b'\'' {
133 if peek(bytes, j + 1) == Some(b'\'') {
135 j += 2;
136 continue;
137 }
138 return Ok(j + 1 - i);
139 }
140 j += 1;
141 }
142 Err(MinifyError::new("unterminated string literal"))
143}
144
145fn scan_quoted_ident(src: &str, i: usize) -> Result<usize, MinifyError> {
146 let bytes = src.as_bytes();
147 debug_assert_eq!(bytes[i], b'"');
148 let mut j = i + 1;
149 while j < bytes.len() {
150 if bytes[j] == b'"' {
151 if peek(bytes, j + 1) == Some(b'"') {
152 j += 2;
153 continue;
154 }
155 return Ok(j + 1 - i);
156 }
157 j += 1;
158 }
159 Err(MinifyError::new("unterminated quoted identifier"))
160}
161
162fn try_scan_dollar_quoted(bytes: &[u8], i: usize) -> Option<(usize, usize)> {
166 debug_assert_eq!(bytes[i], b'$');
167 let tag_start = i + 1;
168 let mut j = tag_start;
169 while j < bytes.len() {
170 let b = bytes[j];
171 if b == b'$' {
172 break;
173 }
174 if !(b.is_ascii_alphanumeric() || b == b'_') {
175 return None;
176 }
177 j += 1;
178 }
179 if j >= bytes.len() || bytes[j] != b'$' {
180 return None;
181 }
182 let tag_end = j; let tag = &bytes[tag_start..tag_end];
184 let body_start = tag_end + 1;
185 let mut k = body_start;
187 while k < bytes.len() {
188 if bytes[k] == b'$' && k + 1 + tag.len() < bytes.len() {
189 if &bytes[k + 1..k + 1 + tag.len()] == tag
190 && bytes.get(k + 1 + tag.len()) == Some(&b'$')
191 {
192 return Some((tag_end, k + 1 + tag.len() + 1));
193 }
194 }
195 if bytes[k] == b'$' && tag.is_empty() {
196 if peek(bytes, k + 1) == Some(b'$') && k > body_start {
198 return Some((tag_end, k + 2));
199 }
200 }
201 k += 1;
202 }
203 None
204}
205
206fn is_word_start(src: &str, i: usize) -> bool {
207 let c = char_at(src, i);
208 c.is_alphabetic() || c == '_' || c.is_ascii_digit()
209}
210
211fn scan_word(src: &str, i: usize) -> usize {
212 let bytes = src.as_bytes();
213 let mut j = i;
214 while j < bytes.len() {
215 let c = char_at(src, j);
216 if c.is_alphanumeric() || c == '_' {
217 j += c.len_utf8();
218 continue;
219 }
220 if c == '.' {
221 let next = peek(bytes, j + 1);
223 if matches!(next, Some(b'0'..=b'9')) && j > i {
224 j += 1;
225 continue;
226 }
227 }
228 break;
229 }
230 j - i
231}
232
233fn scan_multi_punct(bytes: &[u8], i: usize) -> usize {
234 let two = bytes
235 .get(i..i + 2)
236 .map(|s| std::str::from_utf8(s).unwrap_or(""))
237 .unwrap_or("");
238 if matches!(two, "<=" | ">=" | "<>" | "!=" | "||" | "::") {
239 return 2;
240 }
241 let c = char_at(unsafe { std::str::from_utf8_unchecked(bytes) }, i);
242 c.len_utf8()
243}
244
245fn peek(bytes: &[u8], i: usize) -> Option<u8> {
246 bytes.get(i).copied()
247}
248
249fn char_at(src: &str, i: usize) -> char {
250 src[i..].chars().next().unwrap_or('\0')
251}
252
253#[cfg(test)]
254mod tests {
255 use super::*;
256
257 fn min(s: &str) -> String {
258 minify(s, &MinifyOptions::default()).unwrap().body
259 }
260
261 #[test]
262 fn select_with_whitespace() {
263 let src = "SELECT *\n FROM users\n WHERE id = 1;";
264 let out = min(src);
265 assert_eq!(out, "SELECT*FROM users WHERE id=1;");
268 }
269
270 #[test]
271 fn line_comment_stripped() {
272 let src = "-- comment\nSELECT 1;";
273 let out = min(src);
274 assert_eq!(out, "SELECT 1;");
275 }
276
277 #[test]
278 fn block_comment_stripped() {
279 let src = "/* hi */ SELECT 1;";
280 let out = min(src);
281 assert_eq!(out, "SELECT 1;");
282 }
283
284 #[test]
285 fn nested_block_comment() {
286 let src = "/* outer /* inner */ outer */ SELECT 1;";
287 let out = min(src);
288 assert_eq!(out, "SELECT 1;");
289 }
290
291 #[test]
292 fn doubled_quote_in_string() {
293 let src = "SELECT 'O''Brien';";
294 let out = min(src);
295 assert_eq!(out, "SELECT'O''Brien';");
297 }
298
299 #[test]
300 fn quoted_identifier() {
301 let src = "SELECT \"my col\" FROM t;";
302 let out = min(src);
303 assert_eq!(out, "SELECT\"my col\"FROM t;");
306 }
307
308 #[test]
309 fn dollar_quoted_string() {
310 let src = "DO $$ BEGIN RAISE NOTICE 'hi'; END $$;";
311 let out = min(src);
312 assert!(
313 out.contains("$$ BEGIN RAISE NOTICE 'hi'; END $$"),
314 "{}",
315 out
316 );
317 }
318
319 #[test]
320 fn dollar_quoted_with_tag() {
321 let src = "SELECT $tag$ raw \"text\" $tag$;";
322 let out = min(src);
323 assert!(out.contains("$tag$ raw \"text\" $tag$"));
324 }
325
326 #[test]
327 fn positional_param() {
328 let src = "SELECT * FROM t WHERE id = $1;";
329 let out = min(src);
330 assert_eq!(out, "SELECT*FROM t WHERE id=$1;");
331 }
332
333 #[test]
334 fn keep_comments_converts() {
335 let src = "-- hi\nSELECT 1;";
336 let r = minify(
337 src,
338 &MinifyOptions {
339 keep_comments: true,
340 },
341 )
342 .unwrap();
343 assert!(r.body.starts_with("/* hi*/"));
344 assert_eq!(r.warnings.len(), 1);
345 }
346
347 #[test]
348 fn unterminated_string() {
349 assert!(minify("SELECT 'oops", &MinifyOptions::default()).is_err());
350 }
351
352 #[test]
353 fn unterminated_block_comment() {
354 assert!(minify("/* unterminated", &MinifyOptions::default()).is_err());
355 }
356
357 #[test]
358 fn case_preservation() {
359 let src = "select Foo from Bar;";
360 let out = min(src);
361 assert_eq!(out, "select Foo from Bar;");
363 }
364
365 #[test]
366 fn double_dash_only_at_start_of_word() {
367 let src = "SELECT 5--1\nFROM t;";
371 let out = min(src);
372 assert_eq!(out, "SELECT 5 FROM t;");
373 }
374}