Skip to main content

brief/minify/
c_common.rs

1//! Shared building blocks for the C-family minifiers (Rust, Go, JS/TS,
2//! Java, C/C++, SQL). Each language module produces a `Vec<Token>` with its
3//! own lexer; a shared emitter consumes the stream to produce a minified
4//! string with minimal-but-safe whitespace.
5
6#![allow(dead_code)]
7
8use super::{MinifyError, MinifyOutput, MinifyWarning};
9
10#[derive(Debug, Clone)]
11pub enum TokenKind<'a> {
12    /// Identifier, keyword, or numeric literal — anything that requires a
13    /// word-boundary against another adjacent word.
14    Word(&'a str),
15    /// Operator or punctuation. May be 1+ characters; multi-character forms
16    /// like `===`, `??`, `=>`, `->`, `::` are emitted as a single Punct so
17    /// the dangerous-pair table doesn't have to pull them apart again.
18    Punct(&'a str),
19    /// String/char literal — emitted verbatim, including delimiters.
20    StrLit(&'a str),
21    /// `// …` line comment body (without the leading `//` or trailing
22    /// newline). The minifier emits or drops based on `keep_comments`.
23    LineComment(&'a str),
24    /// `/* … */` block comment body (without the surrounding delimiters).
25    BlockComment(&'a str),
26    /// JS template literal `` `…` `` — verbatim, including backticks.
27    Template(&'a str),
28    /// JS regex literal `/…/flags` — verbatim.
29    Regex(&'a str),
30    /// C/C++ preprocessor line, including the leading `#` and trailing line
31    /// continuations. Emitted on its own line.
32    Preproc(&'a str),
33    /// Significant newline (used by Strategy B emitters to preserve ASI).
34    Newline,
35}
36
37#[derive(Debug, Clone)]
38pub struct Token<'a> {
39    pub kind: TokenKind<'a>,
40}
41
42impl<'a> Token<'a> {
43    pub fn new(kind: TokenKind<'a>) -> Self {
44        Token { kind }
45    }
46}
47
48/// True if `c` is a "word" character — the kind of glyph that, if pressed
49/// against another word character with no whitespace, would form a different
50/// token. ASCII alnum, `_`, `$`. Non-ASCII identifiers in Rust/Java are
51/// allowed; we treat all alphabetic chars as words regardless of script.
52pub fn is_word_char(c: char) -> bool {
53    c.is_alphanumeric() || c == '_' || c == '$'
54}
55
56/// True if removing whitespace between two adjacent characters would change
57/// the token stream (form a multi-character operator, comment marker, or
58/// merge two words). The caller passes the last char of the previously
59/// emitted token and the first char of the next token.
60pub fn needs_space(prev: char, next: char) -> bool {
61    if is_word_char(prev) && is_word_char(next) {
62        return true;
63    }
64    // The pairs that, if joined, become a different lexical token.
65    matches!(
66        (prev, next),
67        ('+', '+')
68            | ('-', '-')
69            | ('<', '<')
70            | ('>', '>')
71            | ('*', '*')
72            | ('/', '/')
73            | ('/', '*')
74            | ('*', '/')
75            | (':', ':')
76            | ('&', '&')
77            | ('|', '|')
78            | ('=', '=')
79            | ('!', '=')
80            | ('<', '=')
81            | ('>', '=')
82            | ('+', '=')
83            | ('-', '=')
84            | ('*', '=')
85            | ('/', '=')
86            | ('%', '=')
87            | ('&', '=')
88            | ('|', '=')
89            | ('^', '=')
90            | ('-', '>')
91            | ('=', '>')
92            | ('?', '?')
93            | ('?', '.')
94            | ('.', '.')
95    )
96}
97
98fn last_char(s: &str) -> Option<char> {
99    s.chars().next_back()
100}
101fn first_char(s: &str) -> Option<char> {
102    s.chars().next()
103}
104
105/// Emit a token stream stripping all whitespace and (default) all comments.
106/// Used by Rust, Java, SQL.
107pub fn emit_aggressive(
108    tokens: &[Token<'_>],
109    opts_keep_comments: bool,
110) -> Result<MinifyOutput, MinifyError> {
111    let mut out = String::new();
112    let mut warnings: Vec<MinifyWarning> = Vec::new();
113    let mut prev_emit_last: Option<char> = None;
114    for tok in tokens {
115        match &tok.kind {
116            TokenKind::Newline => {}
117            TokenKind::LineComment(body) => {
118                if !opts_keep_comments {
119                    continue;
120                }
121                let block = format!("/*{}*/", body);
122                push_with_space(&mut out, &mut prev_emit_last, &block);
123                warnings.push(MinifyWarning::LineCommentConverted);
124            }
125            TokenKind::BlockComment(body) => {
126                if !opts_keep_comments {
127                    continue;
128                }
129                let block = format!("/*{}*/", body);
130                push_with_space(&mut out, &mut prev_emit_last, &block);
131            }
132            TokenKind::Word(s)
133            | TokenKind::Punct(s)
134            | TokenKind::StrLit(s)
135            | TokenKind::Template(s)
136            | TokenKind::Regex(s) => {
137                push_with_space(&mut out, &mut prev_emit_last, s);
138            }
139            TokenKind::Preproc(s) => {
140                if !out.is_empty() && !out.ends_with('\n') {
141                    out.push('\n');
142                }
143                out.push_str(s);
144                if !s.ends_with('\n') {
145                    out.push('\n');
146                }
147                prev_emit_last = None;
148            }
149        }
150    }
151    Ok(MinifyOutput {
152        body: out,
153        warnings,
154    })
155}
156
157/// Emit a token stream preserving newlines (so JS/TS/Go ASI behavior is
158/// preserved). Horizontal whitespace and comments are still stripped.
159pub fn emit_conservative(
160    tokens: &[Token<'_>],
161    opts_keep_comments: bool,
162) -> Result<MinifyOutput, MinifyError> {
163    let mut out = String::new();
164    let mut warnings: Vec<MinifyWarning> = Vec::new();
165    let mut prev_emit_last: Option<char> = None;
166    for tok in tokens {
167        match &tok.kind {
168            TokenKind::Newline => {
169                // Collapse runs of newlines down to one — leading newlines
170                // from prior comments shouldn't pile up.
171                if !out.ends_with('\n') {
172                    out.push('\n');
173                }
174                prev_emit_last = None;
175            }
176            TokenKind::LineComment(body) => {
177                if !opts_keep_comments {
178                    continue;
179                }
180                let block = format!("/*{}*/", body);
181                push_with_space(&mut out, &mut prev_emit_last, &block);
182                warnings.push(MinifyWarning::LineCommentConverted);
183            }
184            TokenKind::BlockComment(body) => {
185                if !opts_keep_comments {
186                    continue;
187                }
188                let block = format!("/*{}*/", body);
189                push_with_space(&mut out, &mut prev_emit_last, &block);
190            }
191            TokenKind::Word(s)
192            | TokenKind::Punct(s)
193            | TokenKind::StrLit(s)
194            | TokenKind::Template(s)
195            | TokenKind::Regex(s) => {
196                push_with_space(&mut out, &mut prev_emit_last, s);
197            }
198            TokenKind::Preproc(s) => {
199                // Preprocessor isn't really a JS/Go thing but for symmetry:
200                if !out.is_empty() && !out.ends_with('\n') {
201                    out.push('\n');
202                }
203                out.push_str(s);
204                if !s.ends_with('\n') {
205                    out.push('\n');
206                }
207                prev_emit_last = None;
208            }
209        }
210    }
211    Ok(MinifyOutput {
212        body: out,
213        warnings,
214    })
215}
216
217fn push_with_space(out: &mut String, prev_emit_last: &mut Option<char>, s: &str) {
218    if s.is_empty() {
219        return;
220    }
221    if let Some(prev) = *prev_emit_last {
222        if let Some(next) = first_char(s) {
223            if needs_space(prev, next) {
224                out.push(' ');
225            }
226        }
227    }
228    out.push_str(s);
229    *prev_emit_last = last_char(s);
230}
231
232#[cfg(test)]
233mod tests {
234    use super::*;
235
236    #[test]
237    fn word_word_needs_space() {
238        assert!(needs_space('a', 'b'));
239        assert!(needs_space('1', 'x'));
240        assert!(needs_space('_', 'a'));
241    }
242
243    #[test]
244    fn word_punct_no_space() {
245        assert!(!needs_space('a', '('));
246        assert!(!needs_space('1', ';'));
247        assert!(!needs_space(')', '{'));
248    }
249
250    #[test]
251    fn dangerous_pairs_need_space() {
252        assert!(needs_space('+', '+'));
253        assert!(needs_space('-', '-'));
254        assert!(needs_space('/', '/'));
255        assert!(needs_space('/', '*'));
256        assert!(needs_space('=', '='));
257        assert!(needs_space('!', '='));
258        assert!(needs_space('<', '='));
259        assert!(needs_space(':', ':'));
260        assert!(needs_space('&', '&'));
261        assert!(needs_space('|', '|'));
262        assert!(needs_space('-', '>'));
263        assert!(needs_space('=', '>'));
264        assert!(needs_space('.', '.'));
265    }
266
267    #[test]
268    fn safe_punct_pairs_no_space() {
269        assert!(!needs_space('(', '{'));
270        assert!(!needs_space(',', ' '));
271        assert!(!needs_space(';', '}'));
272        assert!(!needs_space(')', ';'));
273        assert!(!needs_space('+', 'a'));
274        assert!(!needs_space('a', ')'));
275    }
276}