minifier/js/
utils.rs

1// Take a look at the license at the top of the repository in the LICENSE file.
2
3use crate::js::token::{Keyword, Operation, ReservedChar, Token, Tokens};
4use std::vec::IntoIter;
5
6pub(crate) struct VariableNameGenerator<'a> {
7    letter: char,
8    lower: Option<Box<VariableNameGenerator<'a>>>,
9    prepend: Option<&'a str>,
10}
11
12impl<'a> VariableNameGenerator<'a> {
13    pub(crate) fn new(prepend: Option<&'a str>, nb_letter: usize) -> VariableNameGenerator<'a> {
14        if nb_letter > 1 {
15            VariableNameGenerator {
16                letter: 'a',
17                lower: Some(Box::new(VariableNameGenerator::new(None, nb_letter - 1))),
18                prepend,
19            }
20        } else {
21            VariableNameGenerator {
22                letter: 'a',
23                lower: None,
24                prepend,
25            }
26        }
27    }
28
29    pub(crate) fn next(&mut self) {
30        self.incr_letters();
31    }
32
33    #[allow(clippy::inherent_to_string)]
34    pub(crate) fn to_string(&self) -> String {
35        if let Some(ref lower) = self.lower {
36            format!(
37                "{}{}{}",
38                self.prepend.unwrap_or(""),
39                self.letter,
40                lower.to_string()
41            )
42        } else {
43            format!("{}{}", self.prepend.unwrap_or(""), self.letter)
44        }
45    }
46
47    #[allow(dead_code)]
48    pub(crate) fn len(&self) -> usize {
49        let first = match self.prepend {
50            Some(s) => s.len(),
51            None => 0,
52        } + 1;
53        first
54            + match self.lower {
55                Some(ref s) => s.len(),
56                None => 0,
57            }
58    }
59
60    pub(crate) fn incr_letters(&mut self) {
61        let max = [('z', 'A'), ('Z', '0'), ('9', 'a')];
62
63        for (m, next) in &max {
64            if self.letter == *m {
65                self.letter = *next;
66                if self.letter == 'a' {
67                    if let Some(ref mut lower) = self.lower {
68                        lower.incr_letters();
69                    } else {
70                        self.lower = Some(Box::new(VariableNameGenerator::new(None, 1)));
71                    }
72                }
73                return;
74            }
75        }
76        self.letter = ((self.letter as u8) + 1) as char;
77    }
78}
79
80/// Replace given tokens with others.
81///
82/// # Example
83///
84/// ```rust
85/// extern crate minifier;
86/// use minifier::js::{Keyword, Token, replace_tokens_with, simple_minify};
87///
88/// fn main() {
89///     let js = r#"
90///         function replaceByNull(data, func) {
91///             for (var i = 0; i < data.length; ++i) {
92///                 if func(data[i]) {
93///                     data[i] = null;
94///                 }
95///             }
96///         }
97///     }"#.into();
98///     let js_minified = simple_minify(js)
99///         .apply(|f| {
100///             replace_tokens_with(f, |t| {
101///                 if *t == Token::Keyword(Keyword::Null) {
102///                     Some(Token::Other("N"))
103///                 } else {
104///                     None
105///                 }
106///             })
107///         });
108///     println!("{}", js_minified.to_string());
109/// }
110/// ```
111///
112/// The previous code will have all its `null` keywords replaced with `N`. In such cases,
113/// don't forget to include the definition of `N` in the returned minified javascript:
114///
115/// ```js
116/// var N = null;
117/// ```
118#[inline]
119pub fn replace_tokens_with<'a, 'b: 'a, F: Fn(&Token<'a>) -> Option<Token<'b>>>(
120    mut tokens: Tokens<'a>,
121    callback: F,
122) -> Tokens<'a> {
123    for token in tokens.0.iter_mut() {
124        if let Some(t) = callback(token) {
125            *token = t;
126        }
127    }
128    tokens
129}
130
131/// Replace a given token with another.
132#[inline]
133pub fn replace_token_with<'a, 'b: 'a, F: Fn(&Token<'a>) -> Option<Token<'b>>>(
134    token: Token<'a>,
135    callback: &F,
136) -> Token<'a> {
137    if let Some(t) = callback(&token) {
138        t
139    } else {
140        token
141    }
142}
143
144/// When looping over `Tokens`, if you encounter `Keyword::Var`, `Keyword::Let` or
145/// `Token::Other` using this function will allow you to get the variable name's
146/// position and the variable value's position (if any).
147///
148/// ## Note
149///
150/// It'll return the value only if there is an `Operation::Equal` found.
151///
152/// # Examples
153///
154/// ```
155/// extern crate minifier;
156/// use minifier::js::{Keyword, get_variable_name_and_value_positions, simple_minify};
157///
158/// fn main() {
159///     let source = r#"var x = 1;var z;var y   =   "2";"#;
160///     let mut result = Vec::new();
161///
162///     let tokens = simple_minify(source);
163///
164///     for pos in 0..tokens.len() {
165///         match tokens[pos].get_keyword() {
166///             Some(k) if k == Keyword::Let || k == Keyword::Var => {
167///                 if let Some(x) = get_variable_name_and_value_positions(&tokens, pos) {
168///                     result.push(x);
169///                 }
170///             }
171///             _ => {}
172///         }
173///     }
174///     assert_eq!(result, vec![(2, Some(6)), (10, None), (14, Some(22))]);
175/// }
176/// ```
177pub fn get_variable_name_and_value_positions<'a>(
178    tokens: &'a Tokens<'a>,
179    pos: usize,
180) -> Option<(usize, Option<usize>)> {
181    if pos >= tokens.len() {
182        return None;
183    }
184    let mut tmp = pos;
185    match tokens[pos] {
186        Token::Keyword(Keyword::Let) | Token::Keyword(Keyword::Var) => {
187            tmp += 1;
188        }
189        Token::Other(_) if pos > 0 => {
190            let mut pos = pos - 1;
191            while pos > 0 {
192                if tokens[pos].is_comment() || tokens[pos].is_white_character() {
193                    pos -= 1;
194                } else if tokens[pos] == Token::Char(ReservedChar::Comma)
195                    || tokens[pos] == Token::Keyword(Keyword::Let)
196                    || tokens[pos] == Token::Keyword(Keyword::Var)
197                {
198                    break;
199                } else {
200                    return None;
201                }
202            }
203        }
204        _ => return None,
205    }
206    while tmp < tokens.len() {
207        if tokens[tmp].is_other() {
208            let mut tmp2 = tmp + 1;
209            while tmp2 < tokens.len() {
210                if tokens[tmp2] == Token::Operation(Operation::Equal) {
211                    tmp2 += 1;
212                    while tmp2 < tokens.len() {
213                        let token = &tokens[tmp2];
214                        if token.is_string()
215                            || token.is_other()
216                            || token.is_regex()
217                            || token.is_number()
218                            || token.is_floating_number()
219                        {
220                            return Some((tmp, Some(tmp2)));
221                        } else if !tokens[tmp2].is_comment() && !tokens[tmp2].is_white_character() {
222                            break;
223                        }
224                        tmp2 += 1;
225                    }
226                    break;
227                } else if matches!(
228                    tokens[tmp2].get_char(),
229                    Some(ReservedChar::Comma) | Some(ReservedChar::SemiColon)
230                ) {
231                    return Some((tmp, None));
232                } else if !(tokens[tmp2].is_comment()
233                    || tokens[tmp2].is_white_character()
234                        && tokens[tmp2].get_char() != Some(ReservedChar::Backline))
235                {
236                    break;
237                }
238                tmp2 += 1;
239            }
240        } else {
241            // We don't care about syntax errors.
242        }
243        tmp += 1;
244    }
245    None
246}
247
248#[inline]
249fn get_next<'a>(it: &mut IntoIter<Token<'a>>) -> Option<Token<'a>> {
250    for t in it {
251        if t.is_comment() || t.is_white_character() {
252            continue;
253        }
254        return Some(t);
255    }
256    None
257}
258
259/// Convenient function used to clean useless tokens in a token list.
260///
261/// # Example
262///
263/// ```rust,no_run
264/// extern crate minifier;
265///
266/// use minifier::js::{clean_tokens, simple_minify};
267/// use std::fs;
268///
269/// fn main() {
270///     let content = fs::read("some_file.js").expect("file not found");
271///     let source = String::from_utf8_lossy(&content);
272///     let s = simple_minify(&source); // First we get the tokens list.
273///     let s = s.apply(clean_tokens);  // We now have a cleaned token list!
274///     println!("result: {:?}", s);
275/// }
276/// ```
277#[allow(clippy::collapsible_if)]
278pub fn clean_tokens(tokens: Tokens<'_>) -> Tokens<'_> {
279    let mut v = Vec::with_capacity(tokens.len() / 3 * 2);
280    let mut it = tokens.0.into_iter();
281
282    loop {
283        let token = get_next(&mut it);
284        if token.is_none() {
285            break;
286        }
287        let token = token.unwrap();
288        if token.is_white_character() {
289            continue;
290        } else if token.get_char() == Some(ReservedChar::SemiColon) {
291            if v.is_empty() {
292                continue;
293            }
294            // FIXME: `for` and `while` loops can have their block replaced by a `;`. So as long
295            // as we don't have an AST, we cannot keep this optimization.
296            // if let Some(next) = get_next(&mut it) {
297            //     if next != Token::Char(ReservedChar::CloseCurlyBrace) {
298            //         v.push(token);
299            //     }
300            //     v.push(next);
301            // }
302            // continue;
303        }
304        v.push(token);
305    }
306    v.into()
307}
308
309/// Returns true if the token is a "useful" one (so not a comment or a "useless"
310/// character).
311pub fn clean_token(token: &Token<'_>, next_token: &Option<&Token<'_>>) -> bool {
312    !token.is_comment() && {
313        if let Some(x) = token.get_char() {
314            !x.is_white_character()
315                && (x != ReservedChar::SemiColon
316                    || *next_token != Some(&Token::Char(ReservedChar::CloseCurlyBrace)))
317        } else {
318            true
319        }
320    }
321}
322
323#[inline]
324fn get_next_except<'a, F: Fn(&Token<'a>) -> bool>(
325    it: &mut IntoIter<Token<'a>>,
326    f: &F,
327) -> Option<Token<'a>> {
328    for t in it {
329        if (t.is_comment() || t.is_white_character()) && f(&t) {
330            continue;
331        }
332        return Some(t);
333    }
334    None
335}
336
337/// Same as `clean_tokens` except that if a token is considered as not desired,
338/// the callback is called. If the callback returns `false` as well, it will
339/// be removed.
340///
341/// # Example
342///
343/// ```rust,no_run
344/// extern crate minifier;
345///
346/// use minifier::js::{clean_tokens_except, simple_minify, ReservedChar};
347/// use std::fs;
348///
349/// fn main() {
350///     let content = fs::read("some_file.js").expect("file not found");
351///     let source = String::from_utf8_lossy(&content);
352///     let s = simple_minify(&source); // First we get the tokens list.
353///     let s = s.apply(|f| {
354///         clean_tokens_except(f, |c| {
355///             c.get_char() != Some(ReservedChar::Backline)
356///         })
357///     });  // We now have a cleaned token list which kept backlines!
358///     println!("result: {:?}", s);
359/// }
360/// ```
361pub fn clean_tokens_except<'a, F: Fn(&Token<'a>) -> bool>(tokens: Tokens<'a>, f: F) -> Tokens<'a> {
362    let mut v = Vec::with_capacity(tokens.len() / 3 * 2);
363    let mut it = tokens.0.into_iter();
364
365    loop {
366        let token = get_next_except(&mut it, &f);
367        if token.is_none() {
368            break;
369        }
370        let token = token.unwrap();
371        if token.is_white_character() {
372            if f(&token) {
373                continue;
374            }
375        } else if token.get_char() == Some(ReservedChar::SemiColon) {
376            if v.is_empty() {
377                if !f(&token) {
378                    v.push(token);
379                }
380                continue;
381            }
382            if let Some(next) = get_next_except(&mut it, &f) {
383                if next != Token::Char(ReservedChar::CloseCurlyBrace) || !f(&token) {
384                    v.push(token);
385                }
386                v.push(next);
387            } else if !f(&token) {
388                v.push(token);
389            }
390            continue;
391        }
392        v.push(token);
393    }
394    v.into()
395}
396
397/// Returns true if the token is a "useful" one (so not a comment or a "useless"
398/// character).
399#[inline]
400pub fn clean_token_except<'a, F: Fn(&Token<'a>) -> bool>(
401    token: &Token<'a>,
402    next_token: &Option<&Token<'_>>,
403    f: &F,
404) -> bool {
405    if !clean_token(token, next_token) {
406        !f(token)
407    } else {
408        true
409    }
410}
411
412pub(crate) fn get_array<'a>(
413    tokens: &'a Tokens<'a>,
414    array_name: &str,
415) -> Option<(Vec<usize>, usize)> {
416    let mut ret = Vec::new();
417
418    let mut looking_for_var = false;
419    let mut looking_for_equal = false;
420    let mut looking_for_array_start = false;
421    let mut getting_values = false;
422
423    for pos in 0..tokens.len() {
424        if looking_for_var {
425            match tokens[pos] {
426                Token::Other(s) => {
427                    looking_for_var = false;
428                    if s == array_name {
429                        looking_for_equal = true;
430                    }
431                }
432                ref s => {
433                    looking_for_var = s.is_comment() || s.is_white_character();
434                }
435            }
436        } else if looking_for_equal {
437            match tokens[pos] {
438                Token::Operation(Operation::Equal) => {
439                    looking_for_equal = false;
440                    looking_for_array_start = true;
441                }
442                ref s => {
443                    looking_for_equal = s.is_comment() || s.is_white_character();
444                }
445            }
446        } else if looking_for_array_start {
447            match tokens[pos] {
448                Token::Char(ReservedChar::OpenBracket) => {
449                    looking_for_array_start = false;
450                    getting_values = true;
451                }
452                ref s => {
453                    looking_for_array_start = s.is_comment() || s.is_white_character();
454                }
455            }
456        } else if getting_values {
457            match &tokens[pos] {
458                Token::Char(ReservedChar::CloseBracket) => {
459                    return Some((ret, pos));
460                }
461                s if s.is_comment() || s.is_white_character() => {}
462                _ => {
463                    ret.push(pos);
464                }
465            }
466        } else {
467            match tokens[pos] {
468                Token::Keyword(Keyword::Let) | Token::Keyword(Keyword::Var) => {
469                    looking_for_var = true;
470                }
471                _ => {}
472            }
473        }
474    }
475    None
476}
477
478#[test]
479fn check_get_array() {
480    let source = r#"var x = [  ]; var y = ['hello',
481    12]; var z = []; var w = 12;"#;
482
483    let tokens = crate::js::token::tokenize(source);
484
485    let ar = get_array(&tokens, "x");
486    assert!(ar.is_some());
487    assert_eq!(ar.unwrap().1, 9);
488
489    let ar = get_array(&tokens, "y");
490    assert!(ar.is_some());
491    assert_eq!(ar.unwrap().1, 27);
492
493    let ar = get_array(&tokens, "z");
494    assert!(ar.is_some());
495    assert_eq!(ar.unwrap().1, 37);
496
497    let ar = get_array(&tokens, "w");
498    assert!(ar.is_none());
499
500    let ar = get_array(&tokens, "W");
501    assert!(ar.is_none());
502}
503
504#[test]
505fn check_get_variable_name_and_value_positions() {
506    let source = r#"var x = 1;var y   =   "2",we=4;"#;
507    let mut result = Vec::new();
508    let mut pos = 0;
509
510    let tokens = crate::js::token::tokenize(source);
511
512    while pos < tokens.len() {
513        if let Some(x) = get_variable_name_and_value_positions(&tokens, pos) {
514            result.push(x);
515            pos = x.0;
516        }
517        pos += 1;
518    }
519    assert_eq!(result, vec![(2, Some(6)), (10, Some(18)), (20, Some(22))]);
520
521    let mut result = Vec::new();
522    let tokens = crate::js::clean_tokens(tokens);
523    pos = 0;
524
525    while pos < tokens.len() {
526        if let Some(x) = get_variable_name_and_value_positions(&tokens, pos) {
527            result.push(x);
528            pos = x.0;
529        }
530        pos += 1;
531    }
532    assert_eq!(result, vec![(1, Some(3)), (6, Some(8)), (10, Some(12))]);
533}
534
535#[test]
536fn replace_tokens() {
537    let source = r#"
538var x = ['a', 'b', null, 'd', {'x': null, 'e': null, 'z': 'w'}];
539var n = null;
540"#;
541    let expected_result = "var x=['a','b',N,'d',{'x':N,'e':N,'z':'w'}];var n=N;";
542
543    let res = crate::js::simple_minify(source)
544        .apply(crate::js::clean_tokens)
545        .apply(|f| {
546            replace_tokens_with(f, |t| {
547                if *t == Token::Keyword(Keyword::Null) {
548                    Some(Token::Other("N"))
549                } else {
550                    None
551                }
552            })
553        });
554    assert_eq!(res.to_string(), expected_result);
555}
556
557#[test]
558fn check_iterator() {
559    let source = r#"
560var x = ['a', 'b', null, 'd', {'x': null, 'e': null, 'z': 'w'}];
561var n = null;
562"#;
563    let expected_result = "var x=['a','b',N,'d',{'x':N,'e':N,'z':'w'}];var n=N;";
564
565    let mut iter = crate::js::simple_minify(source).into_iter().peekable();
566    let mut tokens = Vec::new();
567    while let Some(token) = iter.next() {
568        if crate::js::clean_token(&token, &iter.peek()) {
569            tokens.push(if token == Token::Keyword(Keyword::Null) {
570                Token::Other("N")
571            } else {
572                token
573            });
574        }
575    }
576    let tokens: Tokens = tokens.into();
577    assert_eq!(tokens.to_string(), expected_result);
578}