sqlparse/engine/
grouping.rs

1use std::convert::From;
2use std::fmt;
3use crate::lexer::{Token, TokenList, tokenize, remove_quotes};
4use crate::tokens::TokenType;
5
6const T_NUMERICAL: [TokenType; 3] = [TokenType::Number, TokenType::NumberInteger, TokenType::NumberFloat];
7const T_STRING: [TokenType; 3] = [TokenType::String, TokenType::StringSingle, TokenType::StringSymbol];
8const T_NAME: [TokenType; 2] = [TokenType::Name, TokenType::NamePlaceholder];
9
10macro_rules! sub_group {
11    ($self:ident, $fn_name:ident) => {
12        for token in $self.tokens.iter_mut() {
13            if token.is_group() {
14                token.children.$fn_name();
15            }
16        }
17    };
18}
19
20pub fn group(tokens: Vec<Token>) -> Vec<Token> {
21    let mut token_list = TokenList::new(tokens);
22    token_list.group();
23    token_list.tokens
24}
25
26/// parse sql into grouped TokenList.
27/// only for test
28pub fn group_tokenlist(sql: &str) -> TokenList {
29    let mut token_list = TokenList::from(sql);
30    token_list.group();
31    token_list
32}
33
34impl From<&str> for TokenList {
35    
36    fn from(sql: &str) -> Self {
37        let tokens = tokenize(sql);
38        TokenList::new(tokens)
39    }
40}
41
42impl std::fmt::Display for TokenList {
43
44    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45        for token in &self.tokens {
46            writeln!(f, "{:?}", token)?;
47        };
48        Ok(())
49    }
50}
51
52// TODO: GroupToken
53impl TokenList {
54
55    /// Create a new TokenList
56    pub fn new(tokens: Vec<Token>) -> Self {
57        // let group_tokens = tokens.into_iter().map(|t| t.into()).collect();
58        Self { tokens: tokens }
59    }
60
61    // print_tree
62    // start, end (end not include)
63    pub fn groupable_tokens(&self, token: &Token) -> (usize, usize) {
64        match token.typ {
65            TokenType::Parenthesis | TokenType::SquareBrackets => (1, self.len()-1),
66            TokenType::Punctuation if token.value == "(" || token.value == "[" => (1, self.len()-1),
67            _ => (0, self.len())
68        }
69    }
70
71    pub fn len(&self) -> usize {
72        self.tokens.len()
73    }
74
75    // join first n value
76    pub fn take_value(&self, idx: usize) -> String {
77        self.tokens.iter().take(idx).map(|t| t.value.as_str()).collect::<Vec<&str>>().join("")
78    }
79
80    fn token_matching(&self, types: &[TokenType], pattern: Option<&(TokenType, Vec<&str>)>, start: usize, end: usize) -> Option<usize> {
81        let pos = if types.len() > 0 {
82            self.tokens[start..end].iter()
83                .position(|token| types.iter().find(|t| **t == token.typ).is_some())
84        } else if let Some(p) = pattern {
85            self.tokens[start..end].iter()
86                .position(|token| p.0 == token.typ && p.1.iter().find(|v| **v == token.normalized).is_some())
87        } else {
88            None
89        };
90        pos.map(|p| p+start)
91    }
92
93    //  fn token_matching_fn(&self, f: fn(&Token) -> bool, start: usize, end: usize, reverse: bool) -> Option<usize> {
94    fn token_matching_fn<F>(&self, f: F, start: usize, end: usize, reverse: bool) -> Option<usize> where F: Fn(&Token) -> bool {
95        if reverse {
96            self.tokens[start..end].iter().rposition(|token| f(token)).map(|p| p+start)
97        } else {
98            self.tokens[start..end].iter().position(|token| f(token)).map(|p| p+start)
99        }
100    }
101
102    fn token_not_matching_fn<F>(&self, f: F, start: usize, end: usize, reverse: bool) -> Option<usize> where F: Fn(&Token) -> bool {
103        if reverse {
104            self.tokens[start..end].iter().rposition(|token| !f(token)).map(|p| p+start)
105        } else {
106            self.tokens[start..end].iter().position(|token| !f(token)).map(|p| p+start)
107        }
108    }
109
110    pub fn token_next_by(&self, types: &[TokenType], pattern: Option<&(TokenType, Vec<&str>)>,start: usize) -> Option<usize> {
111        self.token_matching(types, pattern, start, self.tokens.len())
112    }
113
114    pub fn token_next_by_fn<F>(&self, f: F, start: usize) -> Option<usize> where F: Fn(&Token) -> bool {
115        self.token_matching_fn(f, start, self.tokens.len(), false)
116    }
117
118    pub fn token_next(&self, idx: usize, skip_ws: bool) -> Option<usize> {
119        if skip_ws { self.token_matching_fn(|t| !t.is_whitespace(), idx, self.len(), false) }
120        else { self.token_matching_fn(|_| true, idx, self.len(), false) } 
121        // return self.token_matching_fn(|t| !t.is_whitespace(), idx, self.len(), false);
122    }
123
124    // default skip_ws = true
125    pub fn token_prev(&self, idx: usize, skip_ws: bool) -> Option<usize> {
126        if idx > self.len() || idx == 0 { None } 
127        else if skip_ws { self.token_matching_fn(|t| !t.is_whitespace(), 0, idx, true) }
128        else {  self.token_matching_fn(|_| true, 0, idx, true) }
129    }
130
131    pub fn token_idx(&self, idx: Option<usize>) -> Option<&Token> {
132        idx.map(|i| self.tokens.get(i)).flatten()
133    }
134
135    pub fn extend(&mut self, tokens: Vec<Token>) {
136        self.tokens.extend(tokens)
137    }
138
139    // extend: flatten tokens | push new tokens to prev group
140    fn group_tokens(&mut self, group_type: TokenType, start: usize, end: usize, extend: bool) {
141        if extend && self.tokens[start].typ == group_type {
142            let start_idx = start;
143            let sub_tokens = self.tokens[start_idx+1..end].to_vec();
144            let start = &mut self.tokens[start_idx];
145            start.children.extend(sub_tokens);
146            start.value = Token::new_value(&start.children.tokens);
147            self.tokens.splice(start_idx+1..end, []).for_each(drop);
148            return
149        }
150        let sub_tokens = self.tokens[start..end].to_vec();
151        let group_token = vec![Token::new_parent(group_type, sub_tokens)];
152        self.tokens.splice(start..end, group_token).for_each(drop);
153    }
154
155    pub fn insert_before(&mut self, index: usize, token: Token){
156        self.tokens.insert(index, token)
157    }
158
159    // insert newline and remove before space
160    // return true if any space removed
161    pub fn insert_newline_before(&mut self, index: usize, token: Token) -> bool {
162        if index > 0 && self.tokens.get(index-1).map(|t| t.is_whitespace()).unwrap_or(false) {
163            self.tokens[index-1] = token;
164            true
165        } else {
166            self.tokens.insert(index, token);
167            false
168        }
169    }
170
171    pub fn insert_after(&mut self, index: usize, token: Token, skip_ws: bool) {
172        let nidx = self.token_next(index+1, skip_ws);
173        if let Some(idx) = nidx {
174            self.tokens.insert(idx, token)
175        } else {
176            self.tokens.push(token)
177        }
178    }
179
180    // return true if any space removed
181    pub fn insert_newline_after(&mut self, index: usize, token: Token, skip_ws: bool) -> bool {
182        let nidx = self.token_next(index+1, skip_ws);
183        let mut whitespace_removed = false;
184        if let Some(idx) = nidx {
185            let ptoken = self.token_idx(Some(idx-1));
186            if ptoken.map(|t| t.is_whitespace()).unwrap_or(false) {
187                self.tokens[idx-1] = token;
188                whitespace_removed = true;
189            } else {
190                self.tokens.insert(idx, token)
191            }
192        } else {
193            self.tokens.push(token)
194        };
195        whitespace_removed
196    }
197
198    pub fn get_case(&self, skip_ws: bool) -> Vec<(Vec<usize>, Vec<usize>)> {
199        let mut mode = 1;
200        let mut ret: Vec<(Vec<usize>, Vec<usize>)> = vec![];
201        for (idx, token) in self.tokens.iter().enumerate() {
202            if token.typ == TokenType::Keyword && token.normalized == "CASE" {
203                continue
204            } else if skip_ws && token.is_whitespace() {
205                continue
206            } else if token.typ == TokenType::Keyword && token.normalized == "WHEN" {
207                ret.push((vec![], vec![]));
208                mode = 1;
209            } else if token.typ == TokenType::Keyword && token.normalized == "THEN" {
210                mode = 2;
211            } else if token.typ == TokenType::Keyword && token.normalized == "ELSE" {
212                ret.push((vec![], vec![]));
213                mode = 2;
214            } else if token.typ == TokenType::Keyword && token.normalized == "END" {
215                mode = 0;
216            }
217
218            // First condition without preceding WHEN
219            if mode > 0 && ret.len() < 1 {
220                ret.push((vec![], vec![]));
221            }
222            // Append token depending of the current mode
223            let length = ret.len();
224            if mode == 1 {
225                ret[length-1].0.push(idx);
226            } else if mode == 2 {
227                ret[length-1].1.push(idx);
228            }
229        }
230        ret
231    }
232
233    //  Whitespaces and punctuations are not included
234    pub fn get_identifiers(&self) -> Vec<usize> {
235        self.tokens.iter().enumerate().filter(|(_, t)| !(t.is_whitespace() || t.value == ",")).map(|(i, _)| i).collect::<Vec<_>>()
236    }
237
238    fn group_brackets(&mut self) {
239        group_matching(self, &TokenType::SquareBrackets, &["["], "]");
240    }
241
242    fn group_parenthesis(&mut self) {
243        group_matching(self, &TokenType::Parenthesis, &["("], ")");
244    }
245
246    fn group_case(&mut self) {
247        group_matching(self, &TokenType::Case, &["CASE"], "END");
248    }
249
250    fn group_if(&mut self) {
251        group_matching(self, &TokenType::If, &["IF"], "END IF");
252    }
253
254    // FIXME: multiple space
255    fn group_for(&mut self) {
256        group_matching(self, &TokenType::For, &["FOR", "FOREACH"], "END LOOP");
257    }
258
259    fn group_begin(&mut self) {
260        group_matching(self, &TokenType::Begin, &["BEGIN"], "END");
261    }
262
263    fn group_typecasts(&mut self) {
264        
265        fn matcher(token: &Token) -> bool {
266            token.typ == TokenType::Punctuation && token.value == "::"
267        }
268
269        fn valid(token: Option<&Token>) -> bool {
270            token.is_some()
271        }
272
273        fn post(_tlist: &mut TokenList, pidx: usize, _tidx: usize, nidx: usize) -> (usize, usize) {
274            (pidx, nidx)
275        }
276
277        group_internal(self, TokenType::Identifier, matcher, valid, valid, post, true, true)
278    }
279
280    fn group_tzcasts(&mut self) {
281
282        fn matcher(token: &Token) -> bool {
283            token.typ == TokenType::KeywordTZCast
284        }
285
286        fn valid(token: Option<&Token>) -> bool {
287            token.is_some()
288        }
289
290        fn post(_tlist: &mut TokenList, pidx: usize, _tidx: usize, nidx: usize) -> (usize, usize) {
291            (pidx, nidx)
292        }
293
294        group_internal(self, TokenType::Identifier, matcher, valid, valid, post, true, true)
295
296    }
297    
298    fn group_typed_literal(&mut self) {
299
300        fn matcher(token: &Token) -> bool {
301            token.typ == TokenType::NameBuiltin ||
302            (token.typ == TokenType::Keyword && token.normalized == "TIMESTAMP")
303        }
304
305        fn match_to_extend(token: &Token) -> bool {
306            token.typ == TokenType::TypedLiteral
307        }
308
309        fn valid_prev(token: Option<&Token>) -> bool {
310            token.is_some()
311        }
312
313        fn valid_next(token: Option<&Token>) -> bool {
314            token.map(|t| t.typ == TokenType::StringSingle).unwrap_or(false)
315        }
316
317        fn valid_final(token: Option<&Token>) -> bool {
318            token.map(|t| t.typ == TokenType::Keyword && match t.normalized.as_str() {
319                "DAY" | "HOUR" | "MINUTE" | "MONTH" | "SECOND" | "YEAR" => true,
320                _ => false}).unwrap_or(false)
321        }
322
323        fn post(_tlist: &mut TokenList, _pidx: usize, tidx: usize, nidx: usize) -> (usize, usize) {
324            (tidx, nidx)
325        }
326
327        group_internal(self, TokenType::TypedLiteral, matcher, valid_prev, valid_next, post, false, false);
328        group_internal(self, TokenType::TypedLiteral, match_to_extend, valid_prev, valid_final, post, true, false);
329
330    }
331
332    fn group_identifier(&mut self) {
333        // for token in self.tokens.iter_mut() {
334        //     if token.is_group() {
335        //         token.children.group_identifier();
336        //     }
337        // }
338        sub_group!(self, group_identifier);
339        let ttypes = vec![TokenType::StringSymbol, TokenType::Name];
340        let mut tidx = self.token_next_by(&ttypes, None, 0);
341        while let Some(idx) = tidx {
342            self.group_tokens(TokenType::Identifier, idx, idx +1, false);
343            tidx = self.token_next_by(&ttypes, None, idx+1);
344        }
345    }
346
347    // group_arrays
348
349    fn group_identifier_list(&mut self) {
350
351        fn matcher(token: &Token) -> bool {
352            token.typ == TokenType::Punctuation && token.value == ","
353        }
354
355        fn valid(token: Option<&Token>) -> bool {
356            let types = T_NUMERICAL.iter()
357                .chain(&T_STRING)
358                .chain(&T_NAME)
359                .chain(&[TokenType::Keyword, TokenType::KeywordOrder, TokenType::Comment, TokenType::Wildcard, 
360                    TokenType::Function, TokenType::Case, TokenType::Identifier, 
361                    TokenType::Comparison, TokenType::IdentifierList, TokenType::Operation])
362                .map(|t| t.clone())
363                .collect::<Vec<_>>();
364        
365            let patterns = (TokenType::Keyword, vec!["NULL", "ROLE"]);
366            return Token::imt(token, &types, Some(&patterns))
367        }
368
369        fn post(_tlist: &mut TokenList, pidx: usize, _tidx: usize, nidx: usize) -> (usize, usize) {
370            (pidx, nidx)
371        }
372
373        group_internal(self, TokenType::IdentifierList, matcher, valid, valid, post, true, true)
374    }
375
376    fn group_comments(&mut self) {
377        let types = vec![TokenType::CommentMultiline, TokenType::CommentSingle];
378        let mut tidx = self.token_next_by(&types, None, 0);
379        while let Some(idx) = tidx {
380            let eidx = self.token_not_matching_fn(|t| {
381                // let types = vec![TokenType::CommentMultiline, TokenType::CommentSingle];
382                Token::imt(Some(t), &types, None) || t.typ == TokenType::Whitespace
383            }, idx, self.len(), false);
384            if let Some(end) = eidx {
385                self.group_tokens(TokenType::Comment, idx, end, false);
386            }
387            tidx = self.token_next_by(&types, None, idx+1);
388            
389        }
390    }
391
392    // TODO: add macro
393    fn group_where(&mut self) {
394        sub_group!(self, group_where);
395        let where_open = (TokenType::Keyword, vec!["WHERE"]);
396        let where_close = (TokenType::Keyword, vec!["ORDER BY", "GROUP BY", "LIMIT", "UNION", "UNION ALL", "EXCEPT", "HAVING", "RETURNING", "INTO"]);
397        let mut tidx = self.token_next_by(&vec![], Some(&where_open), 0);
398        while let Some(idx) = tidx {
399            let edix = self.token_next_by(&vec![], Some(&where_close), idx+1);
400            let edix = edix.unwrap_or(self.groupable_tokens(&self.tokens[0]).1);
401            self.group_tokens(TokenType::Where, idx, edix, false);
402            tidx = self.token_next_by(&vec![], Some(&where_open), idx);
403        }
404    }
405
406     fn group_comparison(&mut self) {
407
408        fn matcher(token: &Token) -> bool {
409            token.typ == TokenType::OperatorComparison
410        }
411
412        fn valid(token: Option<&Token>) -> bool {
413            let types = vec![TokenType::Number, TokenType::NumberInteger, TokenType::NumberFloat, 
414                TokenType::String, TokenType::StringSingle, TokenType::StringSymbol,
415                TokenType::Name, TokenType::NamePlaceholder,
416                TokenType::Parenthesis, TokenType::Function, TokenType::Identifier, TokenType::Operation, TokenType::TypedLiteral];
417            let patterns = (TokenType::Parenthesis, vec!["(", ")"]);
418            if Token::imt(token, &types, Some(&patterns)) {
419                true
420            } else if token.map(|t| t.typ == TokenType::Keyword && t.normalized == "NULL").unwrap_or(false) {
421                true
422            } else {
423                false
424            }
425        }
426
427        fn post(_tlist: &mut TokenList, pidx: usize, _tidx: usize, nidx: usize) -> (usize, usize) {
428            (pidx, nidx)
429        }
430
431        group_internal(self, TokenType::Comparison, matcher, valid, valid, post, false, true);
432     }
433
434     fn group_operator(&mut self) {
435
436        fn matcher(token: &Token) -> bool {
437            token.typ == TokenType::Operator || token.typ == TokenType::Wildcard
438        }
439
440        fn valid(token: Option<&Token>) -> bool {
441            let mut types = T_NUMERICAL.iter()
442                .chain(&T_STRING)
443                .chain(&T_NAME)
444                .map(|t| t.clone())
445                .collect::<Vec<_>>();
446            types.extend(vec![TokenType::SquareBrackets, TokenType::Parenthesis, TokenType::Function, 
447                    TokenType::Identifier, TokenType::Operation, TokenType::TypedLiteral]);
448            Token::imt(token, &types, None) || 
449                token.map(|t| t.typ == TokenType::Keyword && (t.value == "CURRENT_DATE" || t.value == "CURRENT_TIME" || t.value == "CURRENT_TIMESTAMP")).unwrap_or(false)
450        }
451
452        fn post(tlist: &mut TokenList, pidx: usize, tidx: usize, nidx: usize) -> (usize, usize) {
453            tlist.tokens[tidx].typ = TokenType::Operator; 
454            (pidx, nidx)
455        }
456
457        group_internal(self, TokenType::Operation, matcher, valid, valid, post, false, true)
458     }
459
460     // schema.table
461     fn group_period(&mut self) {
462        fn matcher(token: &Token) -> bool {
463            token.typ == TokenType::Punctuation && token.value == "."
464        }
465
466        fn valid_prev(token: Option<&Token>) -> bool {
467            let ttypes = vec![TokenType::Name, TokenType::StringSymbol, TokenType::SquareBrackets, TokenType::Identifier];
468            Token::imt(token, &ttypes, None)
469        }
470
471        fn valid_next(_token: Option<&Token>) -> bool {
472            true
473        }
474
475        fn post(tlist: &mut TokenList, pidx: usize, tidx: usize, nidx: usize) -> (usize, usize) {
476            let ttypes = vec![TokenType::Name, TokenType::StringSymbol, TokenType::Wildcard, TokenType::SquareBrackets, TokenType::Function];
477            let next = tlist.token_idx(Some(nidx));
478            let valid_next = Token::imt(next, &ttypes, None);
479            if valid_next { (pidx, nidx) } else { (pidx, tidx) }
480        }
481
482        group_internal(self, TokenType::Identifier, matcher, valid_prev, valid_next, post, true, true);
483     }
484
485    fn group_as(&mut self) {
486
487        fn matcher(token: &Token) -> bool {
488            token.is_keyword() && token.normalized == "AS"
489        }
490
491        fn valid_prev(token: Option<&Token>) -> bool {
492            token.map(|t| t.normalized == "NULL" || !t.is_keyword()).unwrap_or(false)
493        }
494
495        fn valid_next(token: Option<&Token>) -> bool {
496            let ttypes = vec![TokenType::DML, TokenType::DDL, TokenType::CTE];
497            !Token::imt(token, &ttypes, None) && token.is_some()
498        }
499
500        fn post(_tlist: &mut TokenList, pidx: usize, _tidx: usize, nidx: usize) -> (usize, usize) {
501            (pidx, nidx)
502        }
503
504        group_internal(self, TokenType::Identifier, matcher, valid_prev, valid_next, post, true, true);
505    }
506
507    fn group_assignment(&mut self) {
508
509        fn matcher(token: &Token) -> bool {
510            token.typ == TokenType::Assignment && token.value == ":="
511        }
512
513        fn valid(token: Option<&Token>) -> bool {
514            token.map(|t| t.typ != TokenType::Keyword).unwrap_or(false)
515        }
516
517        fn post(tlist: &mut TokenList, pidx: usize, _tidx: usize, nidx: usize) -> (usize, usize) {
518            let m_semicolon = (TokenType::Punctuation, vec![";"]);
519            let snidx = tlist.token_next_by(&vec![], Some(&m_semicolon), nidx);
520            let nidx = snidx.unwrap_or(nidx);
521            (pidx, nidx)
522        }
523
524        group_internal(self, TokenType::Assignment, matcher, valid, valid, post, true, true);
525    }
526
527    fn group_aliased(&mut self) {
528        let ttypes = vec![TokenType::Parenthesis, TokenType::Function, TokenType::Case, TokenType::Identifier, 
529            TokenType::Operation, TokenType::Comparison, TokenType::NumberInteger, TokenType::NumberFloat, TokenType::NumberHexadecimal, TokenType::Number];
530        let mut tidx = self.token_next_by(&ttypes, None, 0);
531        while let Some(idx) = tidx {
532            let nidx = self.token_next(idx+1, true);
533            let next = self.token_idx(nidx);
534            if next.map(|n| n.typ == TokenType::Identifier).unwrap_or(false) {
535                self.group_tokens(TokenType::Identifier, idx, nidx.unwrap()+1, true)
536            }
537            tidx = self.token_next_by(&ttypes, None, idx+1);
538        }
539    }
540
541    fn group_functions(&mut self) {
542        let mut has_create = false;
543        let mut has_table = false;
544        for tmp_token in &self.tokens {
545            if tmp_token.normalized == "CREATE" {
546                has_create = true;
547            }
548            if tmp_token.normalized == "TABLE" {
549                has_table = true;
550            }
551        }
552        if has_create && has_table {
553            return
554        }
555        let ttypes = vec![TokenType::Name];
556        let mut tidx = self.token_next_by(&ttypes, None, 0);
557        while let Some(idx) = tidx {
558            let nidx = self.token_next(idx+1, true);
559            let next = self.token_idx(nidx);
560            if next.map(|n| n.typ == TokenType::Parenthesis).unwrap_or(false) {
561                self.group_tokens(TokenType::Function, idx, nidx.unwrap()+1, false)
562            }
563            tidx =  self.token_next_by(&ttypes, None, idx+1);
564        }
565    }
566
567    //  Group together Identifier and Asc/Desc token
568    fn group_order(&mut self) {
569        let ttypes = vec![TokenType::KeywordOrder];
570        let mut tidx = self.token_next_by(&ttypes, None, 0);
571        while let Some(idx) = tidx {
572            let pidx = self.token_prev(idx, true);
573            let prev = self.token_idx(pidx);
574            let ttypes = vec![TokenType::Identifier, TokenType::Number, TokenType::NumberInteger, TokenType::NumberFloat, TokenType::NumberHexadecimal];
575            if Token::imt(prev, &ttypes, None) {
576                self.group_tokens(TokenType::Identifier, pidx.unwrap(), idx+1, false);
577                tidx = pidx;
578            }
579            tidx = self.token_next_by(&ttypes, None, tidx.unwrap()+1);
580        }
581    }
582
583    fn align_comments(&mut self) {
584        let types = vec![TokenType::Comment];
585        let mut tidx = self.token_next_by(&types, None, 0);
586        while let Some(mut idx) = tidx {
587            let pidx = self.token_prev(idx, true);
588            let prev = self.token_idx(pidx);
589            if prev.map(|p| p.is_group()).unwrap_or(false) {
590                let typ = prev.map(|p| p.typ.clone()).unwrap();
591                self.group_tokens(typ, pidx.unwrap(), idx+1, true);
592                idx = pidx.unwrap();
593            }
594            tidx = self.token_next_by(&types, None, idx+1);
595        }
596    }
597
598    // insert into table_name values()
599    fn group_values(&mut self) {
600        let values = (TokenType::Keyword, vec!["VALUES"]);
601        let mut tidx = self.token_next_by(&vec![], Some(&values), 0);
602        let start_idx = tidx;
603        let mut end_idx: Option<usize> = None;
604        while let Some(idx) = tidx {
605            let token = self.token_idx(Some(idx));
606            if token.map(|t| t.typ == TokenType::Parenthesis).unwrap_or(false) {
607               end_idx = tidx 
608            }
609            tidx = self.token_next(idx+1, true);
610        }
611        if let Some(e_idx) = end_idx {
612            self.group_tokens(TokenType::Values, start_idx.unwrap(), e_idx+1, true);
613        }
614    }
615
616    fn group(&mut self) {
617
618        self.group_comments();
619        // group_matching
620        self.group_brackets();
621        self.group_parenthesis();
622        self.group_case();
623        self.group_if();
624        self.group_for();
625        self.group_begin();
626
627        self.group_functions();
628        self.group_where();
629        self.group_period();
630        self.group_identifier();
631        self.group_order();
632        self.group_typecasts();
633        self.group_tzcasts();
634        self.group_typed_literal();
635        self.group_operator();
636        self.group_comparison();
637        self.group_as();
638        self.group_aliased();
639        self.group_assignment();
640
641        self.align_comments();
642        self.group_identifier_list();
643        self.group_values();
644    }
645
646    pub fn get_first_name(&self, idx: Option<usize>, reverse: bool, keywords: bool, real_name: bool) -> Option<&str> {
647        let idx = idx.unwrap_or(0);
648        let tokens = &self.tokens[idx..];
649        let mut ttypes = vec![TokenType::Name, TokenType::Wildcard, TokenType::StringSymbol];
650        if keywords {
651            ttypes.push(TokenType::Keyword)
652        }
653        if reverse {
654            for token in tokens.iter().rev() {
655                if ttypes.iter().find(|typ| **typ == token.typ).is_some() {
656                    return Some(remove_quotes(&token.value))
657                } else if token.typ == TokenType::Identifier || token.typ == TokenType::Function {
658                    return if real_name { token.get_real_name() } else { token.get_name() }
659                }         
660            }
661        }
662        for token in tokens {
663            if ttypes.iter().find(|typ| **typ == token.typ).is_some() {
664                return Some(remove_quotes(&token.value))
665            } else if token.typ == TokenType::Identifier || token.typ == TokenType::Function {
666                return if real_name { token.get_real_name() } else { token.get_name() }
667            }         
668        }
669        None
670    }
671
672}
673
674fn group_matching(tlist: &mut TokenList, typ: &TokenType, open: &[&str], close: &str) {
675    // Groups Tokens that have beginning and end.
676    let mut opens = vec![];
677    let mut tidx_offset = 0;
678    let count = tlist.tokens.len();
679    let mut idx = 0;
680    while idx < count {
681        let tidx = idx - tidx_offset;
682        let token = &tlist.tokens[tidx];
683        if token.is_whitespace() {
684            idx += 1;
685            continue
686        }
687        if token.is_group() && token.typ != *typ {
688            let token = &mut tlist.tokens[tidx];
689            group_matching(&mut token.children, typ, open, close);
690            idx += 1;
691            continue
692        }
693        idx += 1;
694        if open.contains(&token.normalized.as_str()) {
695            opens.push(tidx);
696        } else if token.normalized == close {
697            if opens.len() < 1 {
698                continue
699            }
700            let open_idx = opens[opens.len()-1];
701            opens.truncate(opens.len()-1);
702            let close_idx = tidx;
703            tlist.group_tokens(typ.clone(), open_idx, close_idx+1, false);
704            tidx_offset += close_idx - open_idx;
705        }
706    }
707}
708
709// TODO: interface Grouping
710fn group_internal(
711        tlist: &mut TokenList, 
712        group_type: TokenType,
713        matcher: fn(&Token) -> bool,
714        valid_prev: fn(Option<&Token>) -> bool,
715        valid_next: fn(Option<&Token>) -> bool,
716        post: fn(tlist: &mut TokenList, pidx: usize, tidx: usize, nidx: usize) -> (usize, usize),
717        extend: bool,
718        recurse: bool,
719    ) {
720        let tidx_offset = 0;
721        let mut pidx: Option<usize> = None;
722        let mut prev_: Option<Token> = None;
723        let mut idx = 0;
724        while idx < tlist.len() {
725            if idx < tidx_offset  {
726                idx += 1;
727                continue
728            }
729           
730            let token = &mut tlist.tokens[idx];
731            if token.is_whitespace() {
732                idx += 1;
733                continue
734            }
735            
736            // let token = &mut tlist.tokens[idx];
737            if recurse && token.is_group() && token.typ != group_type {
738                group_internal(&mut token.children, group_type.clone(), matcher, valid_prev, valid_next, post, extend, recurse);
739                std::mem::drop(token)
740            }
741
742            let token = &tlist.tokens[idx];
743            if matcher(token) {
744                let nidx = tlist.token_next(idx+1, true);
745                let next_ = tlist.token_idx(nidx);
746                // println!("T: {:?} P: {:?} N: {:?}", token, prev_, next_);
747                if pidx.is_some() && prev_.is_some() && valid_prev(prev_.as_ref()) && valid_next(next_) {
748                    let (from_idx, to_idx) = post(tlist, pidx.unwrap(), idx, nidx.unwrap_or(tlist.len()));
749                    tlist.group_tokens(group_type.clone(), from_idx, to_idx+1, extend);
750                    pidx = Some(from_idx);
751                    prev_ = tlist.token_idx(pidx).map(|t| t.clone());
752                    // idx += 1;
753                    continue
754                }
755            }
756
757            pidx = Some(idx);
758            prev_ = Some(token.clone());
759            idx += 1;
760        }
761}
762
763
764#[cfg(test)]
765mod tests {
766    use super::*;
767
768    #[test]
769    fn test_group_where() {
770        let sql = "select * from users where id > 10 limit 10;";
771        let tokens = tokenize(sql);
772        let mut tokens = TokenList::new(tokens);
773        tokens.group_where();
774        // println!("{:?}", tokens.tokens);
775    }
776
777    #[test]
778    fn test_group_where1() {
779        let sql = "select * from users where id > 10;";
780        let tokens = tokenize(sql);
781        let mut tokens = TokenList::new(tokens);
782        tokens.group_where();
783        // println!("{:?}", tokens.tokens);
784    }
785
786    #[test]
787    fn test_group_comparison() {
788        let sql = "select * from users where id > 0;";
789        let tokens = tokenize(sql);
790        let mut token_list = TokenList::new(tokens);
791        token_list.group_comparison();
792        assert_eq!(token_list.tokens[10].typ, TokenType::Comparison)
793        // for token in token_list.tokens {
794        //     println!("{:?}", token);
795        // }
796       
797    }
798
799    #[test]
800    fn test_group_comparison1() {
801        let sql = "select * from users where id > 0;";
802        let mut token_list = TokenList::from(sql);
803        token_list.group_where();
804        token_list.group_identifier();
805        token_list.group_comparison();
806        // assert_eq!(token_list.tokens[8].typ, TokenType::Where);
807        // for token in token_list.tokens {
808        //     println!("{:?}", token);
809        // }
810    }
811
812    #[test]
813    fn test_group_fn() {
814        let sql = "select * from users where id > 0;";
815        let mut token_list = TokenList::from(sql);
816        token_list.group();
817        assert_eq!(token_list.tokens[8].typ, TokenType::Where);
818    }
819
820    #[test]
821    fn test_token_prev() {
822        let sql= "select * from ";
823        let token_list = TokenList::from(sql);
824        let t = token_list.token_prev(token_list.len(), true);
825        let t = token_list.token_idx(t).unwrap();
826        assert_eq!(t.value, "from");
827    }
828
829    #[test]
830    fn test_insert_before() {
831        let sql= "select * from ";
832        let mut token_list = TokenList::from(sql);
833        let token = Token::new(TokenType::Whitespace, " ");
834        token_list.insert_before(4, token);
835        let v = Token::new_value(&token_list.tokens);
836        assert_eq!(v, "select *  from ");
837    }
838
839    #[test]
840    fn test_group_period() {
841        let sql = "select * from sch.account";
842        let mut token_list = TokenList::from(sql);
843        token_list.group_period();
844        assert_eq!(token_list.tokens[6].typ, TokenType::Identifier);
845        assert_eq!(token_list.tokens[6].value, "sch.account");
846
847        let sql = "select * from sch.user";
848        let mut token_list = TokenList::from(sql);
849        token_list.group_period();
850        assert_eq!(token_list.tokens[6].typ, TokenType::Identifier);
851        assert_eq!(token_list.tokens[6].value, "sch.user");
852
853        let sql = "select * from sch.user as u";
854        let mut token_list = TokenList::from(sql);
855        token_list.group_period();
856        token_list.group_as();
857        assert_eq!(token_list.tokens[6].typ, TokenType::Identifier);
858        assert_eq!(token_list.tokens[6].value, "sch.user as u");
859    }
860
861    #[test]
862    fn test_group_order() {
863        let sql = "select * from users order by id desc";
864        let mut token_list = TokenList::from(sql);
865        token_list.group();
866        assert_eq!(token_list.tokens[10].typ, TokenType::Identifier);
867        assert_eq!(token_list.tokens[10].value, "id desc");
868    }
869
870    #[test]
871    fn test_get_real_name() {
872        let sql = "select * from test.person as p where ";
873        let mut token_list = TokenList::from(sql);
874        token_list.group();
875        let id = token_list.token_idx(Some(6)).unwrap();
876        let real_name = id.get_real_name();
877        let parent_name = id.get_parent_name();
878        let alias = id.get_alias();
879        assert_eq!(real_name, Some("person"));
880        assert_eq!(parent_name, Some("test"));
881        assert_eq!(alias, Some("p"));
882
883        let sql = "select * from test.person where ";
884        let mut token_list = TokenList::from(sql);
885        token_list.group();
886        let id = token_list.token_idx(Some(6)).unwrap();
887        let real_name = id.get_real_name();
888        let parent_name = id.get_parent_name();
889        let alias = id.get_alias();
890        assert_eq!(real_name, Some("person"));
891        assert_eq!(parent_name, Some("test"));
892        assert_eq!(alias, None);
893
894        let sql = "select * from person where ";
895        let mut token_list = TokenList::from(sql);
896        token_list.group();
897        let id = token_list.token_idx(Some(6)).unwrap();
898        let real_name = id.get_real_name();
899        let parent_name = id.get_parent_name();
900        assert_eq!(real_name, Some("person"));
901        assert_eq!(parent_name, None);
902    }
903
904    #[test]
905    fn test_get_case() {
906        let sql = "case when foo = 1 then 2 when foo = 3 then 4 else 5 end";
907        let token_list = group_tokenlist(sql);
908        // println!("{:?}", token_list);
909        let cases = token_list.tokens[0].children.get_case(false);
910        assert_eq!(cases.len(), 4);
911        // for token in &token_list.tokens[0].children.tokens {
912        //     println!("{:?}", token);
913        // }
914    }
915}