sqlparse/filters/
others.rs

1use regex::Regex;
2use super::{StmtFilter, TokenListFilter};
3use crate::lexer::{Token, TokenList};
4use crate::tokens::{TokenType};
5
6pub struct StripCommentsFilter {
7    newline_reg: Regex,
8}
9
10impl Default for StripCommentsFilter {
11    fn default() -> Self {
12        Self { newline_reg: Regex::new(r"((\r|\n)+) *$").unwrap() }
13    }
14}
15
16impl StripCommentsFilter {
17
18    fn get_next_comment(&self, token_list: &mut TokenList, start: usize) -> Option<usize> {
19        let ttypes = vec![TokenType::Comment, TokenType::CommentSingle, TokenType::CommentMultiline];
20        token_list.token_next_by(&ttypes, None, start)
21    }
22
23    fn get_insert_token(&self, token: &Token) -> Token {
24        let caps = self.newline_reg.captures(&token.value);
25        if let Some(caps) = caps {
26            if let Some(cap) = caps.get(1).map(|c| c.as_str()) {
27                return Token::new(TokenType::Newline, cap)
28            } 
29        }
30        Token::new(TokenType::Whitespace, " ")
31    }
32
33    fn process_internal(&self, token_list: &mut TokenList) {
34        let mut tidx = self.get_next_comment(token_list, 0);
35        while let Some(idx) = tidx {
36            let token = token_list.token_idx(Some(idx)).unwrap();
37            let pidx = token_list.token_prev(idx, false);
38            let ptoken = token_list.token_idx(pidx);
39            let nidx = token_list.token_next(idx, false);
40            let ntoken = token_list.token_idx(nidx);
41            let insert_token = self.get_insert_token(token);
42            let mut step: isize = 0;
43            if ptoken.is_none() || ntoken.is_none() ||
44                 ptoken.map(|p| p.is_whitespace()).unwrap_or(false) || ptoken.map(|p| p.typ == TokenType::Punctuation && p.value == "(").unwrap_or(false) ||
45                 ntoken.map(|p| p.is_whitespace()).unwrap_or(false) || ntoken.map(|p| p.typ == TokenType::Punctuation && p.value == ")").unwrap_or(false)  {
46                    if ptoken.is_some() && !ptoken.map(|p| p.typ == TokenType::Punctuation && p.value == "(").unwrap() {
47                        token_list.insert_after(idx, insert_token, false);
48                        step += 1;
49                    }
50                    token_list.tokens.remove(idx);
51                    step -= 1;
52            } else {
53                token_list.tokens[idx] = insert_token;
54            }
55            tidx = self.get_next_comment(token_list, idx + (step + 1) as usize);
56        }
57    }
58}
59
60impl TokenListFilter for StripCommentsFilter {
61
62    fn process(&mut self, token_list: &mut TokenList) {
63        for token in token_list.tokens.iter_mut() {
64            if token.is_group() && token.typ != TokenType::Comment {
65                self.process(&mut token.children); 
66                token.update_value();
67            }
68        }
69        self.process_internal(token_list);
70    }
71}
72
73pub struct StripWhitespaceFilter { }
74
75impl StripWhitespaceFilter {
76
77    fn stripws(tokens: &mut Vec<Token>) {
78        StripWhitespaceFilter::stripws_default(tokens);
79        StripWhitespaceFilter::stripws_newline(tokens);
80    }
81
82    fn stripws_default(tokens: &mut Vec<Token>) {
83        let mut last_was_ws = false;
84        let mut is_first_char = true;
85        let n = tokens.len();
86        for (i, token) in tokens.iter_mut().enumerate() {
87            if token.is_whitespace() {
88                token.value = if last_was_ws || is_first_char || i+1 == n { "".to_string() } else { " ".to_string() };
89            }
90            last_was_ws = token.is_whitespace();
91            is_first_char = false;
92        }
93    }
94
95    // remove whitespace after newline
96    fn stripws_newline(tokens: &mut Vec<Token>) {
97        let mut idx = 0;
98        while idx < tokens.len() {
99            let token = &tokens[idx];
100            if token.typ != TokenType::Newline {
101                idx += 1;
102                continue
103            }
104            let next_idx = idx+1;
105            while next_idx < tokens.len() {
106                let token_next = &tokens[next_idx];
107                if !token_next.is_whitespace() {
108                    break
109                }
110                tokens.remove(next_idx);
111            }
112            idx += 1;
113        }
114    }
115
116    fn stripws_parenthesis(token: &mut Token) {
117        if token.typ != TokenType::Parenthesis {
118            return
119        }
120        if token.children.token_idx(Some(1)).map(|t| t.is_whitespace()).unwrap_or(false) {
121            token.children.tokens.remove(1);
122        }
123        let token_len = token.children.len();
124        if token_len> 2 && token.children.token_idx(Some(token_len-2)).map(|t| t.is_whitespace()).unwrap_or(false) {
125            token.children.tokens.remove(token_len-2);
126        }
127    }
128
129}
130
131impl StmtFilter for StripWhitespaceFilter {
132
133    fn process(&self, tokens: &mut Vec<Token>) {
134        for token in tokens.iter_mut() {
135            if token.is_group() {
136                Self::stripws_parenthesis(token);
137                self.process(&mut token.children.tokens);
138                token.update_value();
139            }
140        }
141        Self::stripws(tokens);
142    }
143}
144
145pub struct SpacesAroundOperatorsFilter{}
146
147impl SpacesAroundOperatorsFilter {
148
149    fn process_internal(&mut self, token_list: &mut TokenList) {
150        let types = vec![TokenType::Operator, TokenType::OperatorComparison];
151        let mut tidx = token_list.token_next_by(&types, None, 0);
152        while let Some(mut idx) = tidx {
153            let nidx = token_list.token_next(idx+1, false);
154            if let Some(token_next) = token_list.token_idx(nidx) {
155                if token_next.typ != TokenType::Whitespace {
156                    token_list.insert_after(idx, Token::new(TokenType::Whitespace, " "), true);
157                } 
158            }
159
160            let pidx = token_list.token_prev(idx, false);
161            if let Some(token_prev) = token_list.token_idx(pidx) {
162                if token_prev.typ != TokenType::Whitespace {
163                    token_list.insert_before(idx, Token::new(TokenType::Whitespace, " "));
164                }
165                idx += 1;
166            }
167            
168            tidx = token_list.token_next_by(&types, None, idx+1);
169        }
170    }
171}
172
173impl TokenListFilter for SpacesAroundOperatorsFilter {
174
175    fn process(&mut self, token_list: &mut TokenList) {
176        self.process_internal(token_list);
177        for token in token_list.tokens.iter_mut() {
178            if token.is_group() {
179                // let before = token.children.len();
180                self.process(&mut token.children);
181                // println!("before {}, after {}", before, token.children.len());
182                token.update_value();
183            }
184        }
185    }
186}
187
188
189// trim space before newline
190pub struct StripBeforeNewline{}
191
192impl StripBeforeNewline {
193
194    fn process_internal(&self, tokens: &mut Vec<Token>, level: usize) {
195        let mut remove_indexes = vec![];
196        let mut is_before_white = false;
197        // remove leading whitespace
198        if level == 0 && tokens.first().map(|t| t.is_whitespace()).unwrap_or(false) {
199            remove_indexes.push(0)
200        }
201        for (i, token) in tokens.iter_mut().enumerate() {      
202            if token.is_group() {
203                self.process_internal(&mut token.children.tokens, level+1);
204            }
205            if is_before_white && (token.value.starts_with("\n") || token.value.starts_with("\r")) && i > 0 {
206                remove_indexes.push(i-1);
207            }
208            is_before_white = if token.is_group() {
209                // check last token of group is whitespace
210                if let Some(t) = token.children.tokens.last() { t.is_whitespace() } else { false }
211             } else { token.is_whitespace() };
212        }
213        let mut remove_count = 0;
214        remove_indexes.iter().for_each(|idx| {
215            let token = &mut tokens[idx-remove_count];
216            let l = token.children.len();
217            if l > 0 {
218                token.children.tokens.remove(l-1);
219                token.update_value();
220            } else { 
221                tokens.remove(idx-remove_count);
222                remove_count += 1;
223            }
224        });
225    }
226}
227
228impl StmtFilter for StripBeforeNewline {
229
230    fn process(&self, tokens: &mut Vec<Token>) {
231        self.process_internal(tokens, 0);
232    }
233
234} 
235
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240
241    #[test]
242    fn test_get_insert_token() {
243        let filter = StripCommentsFilter::default();
244        let token = Token::new(TokenType::CommentSingle, "-- comment \n\n ");
245        let t = filter.get_insert_token(&token);
246        assert_eq!(t.typ, TokenType::Newline);
247        assert_eq!(t.value, "\n\n");
248
249        let token = Token::new(TokenType::CommentSingle, "-- comment ");
250        let t = filter.get_insert_token(&token);
251        assert_eq!(t.typ, TokenType::Whitespace);
252        assert_eq!(t.value, " ");
253
254    }
255}