clickhouse_native_client/types/
parser.rs1use super::TypeCode;
9use crate::{
10 Error,
11 Result,
12};
13use std::{
14 cell::RefCell,
15 collections::HashMap,
16};
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21enum TokenType {
22 Invalid = 0,
23 Assign,
24 Name,
25 Number,
26 #[allow(dead_code)]
27 String,
28 LPar, RPar, Comma,
31 QuotedString, Eos, }
34
35#[derive(Debug, Clone)]
38struct Token<'a> {
39 token_type: TokenType,
40 value: &'a str,
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub enum TypeMeta {
47 Array,
49 Assign,
51 Null,
53 Nullable,
55 Number,
57 String,
59 Terminal,
61 Tuple,
63 Enum,
65 LowCardinality,
67 SimpleAggregateFunction,
69 Map,
71}
72
73#[derive(Debug, Clone, PartialEq)]
76pub struct TypeAst {
77 pub meta: TypeMeta,
79 pub code: TypeCode,
81 pub name: String,
83 pub value: i64,
85 pub value_string: String,
87 pub elements: Vec<TypeAst>,
89}
90
91impl Default for TypeAst {
92 fn default() -> Self {
93 Self {
94 meta: TypeMeta::Terminal,
95 code: TypeCode::Void,
96 name: String::new(),
97 value: 0,
98 value_string: String::new(),
99 elements: Vec::new(),
100 }
101 }
102}
103
104pub struct TypeParser<'a> {
106 cur: usize,
108 input: &'a str,
110 open_elements: Vec<*mut TypeAst>,
112 current_type: Option<*mut TypeAst>,
114}
115
116impl<'a> TypeParser<'a> {
117 pub fn new(name: &'a str) -> Self {
120 Self {
121 cur: 0,
122 input: name,
123 open_elements: Vec::new(),
124 current_type: None,
125 }
126 }
127
128 pub fn parse(&mut self, type_ast: &mut TypeAst) -> bool {
131 let type_ptr: *mut TypeAst = type_ast as *mut TypeAst;
137 self.current_type = Some(type_ptr);
138 self.open_elements.push(type_ptr);
139
140 let mut processed_tokens = 0;
141
142 loop {
143 let token = self.next_token();
144
145 match token.token_type {
146 TokenType::QuotedString => {
147 unsafe {
148 let current = self.current_type.unwrap();
149 (*current).meta = TypeMeta::String; if token.value.len() >= 2 {
152 (*current).value_string = token.value
153 [1..token.value.len() - 1]
154 .to_string();
155 } else {
156 (*current).value_string = String::new();
157 }
158 (*current).code = TypeCode::String;
159 }
160 }
161
162 TokenType::Name => unsafe {
163 let current = self.current_type.unwrap();
164 (*current).meta = get_type_meta(token.value);
165 (*current).name = token.value.to_string();
166 (*current).code = get_type_code(token.value);
167 },
168
169 TokenType::Number => unsafe {
170 let current = self.current_type.unwrap();
171 (*current).meta = TypeMeta::Number;
172 (*current).value = token.value.parse::<i64>().unwrap_or(0);
173 },
174
175 TokenType::String => unsafe {
176 let current = self.current_type.unwrap();
177 (*current).meta = TypeMeta::String;
178 (*current).value_string = token.value.to_string();
179 },
180
181 TokenType::LPar => {
182 unsafe {
183 let current = self.current_type.unwrap();
184 (*current).elements.push(TypeAst::default());
185 self.open_elements.push(current);
186 let last_idx = (*current).elements.len() - 1;
188 let elements_ptr = (*current).elements.as_mut_ptr();
189 let new_current = elements_ptr.add(last_idx);
190 self.current_type = Some(new_current);
191 }
192 }
193
194 TokenType::RPar => {
195 self.open_elements.pop();
196 if let Some(&parent) = self.open_elements.last() {
197 self.current_type = Some(parent);
198 }
199 }
200
201 TokenType::Assign | TokenType::Comma => {
202 self.open_elements.pop();
203 if let Some(&parent) = self.open_elements.last() {
204 unsafe {
205 (*parent).elements.push(TypeAst::default());
206 self.open_elements.push(parent);
207 let last_idx = (*parent).elements.len() - 1;
208 let elements_ptr = (*parent).elements.as_mut_ptr();
209 let new_current = elements_ptr.add(last_idx);
210 self.current_type = Some(new_current);
211 }
212 }
213 }
214
215 TokenType::Eos => {
216 if self.open_elements.len() != 1 {
218 return false;
219 }
220
221 if processed_tokens == 0 {
223 return false;
224 }
225
226 return validate_ast(type_ast);
227 }
228
229 TokenType::Invalid => {
230 return false;
231 }
232 }
233
234 processed_tokens += 1;
235 }
236 }
237
238 fn next_token(&mut self) -> Token<'a> {
241 let bytes = self.input.as_bytes();
242
243 while self.cur < bytes.len() {
245 match bytes[self.cur] as char {
246 ' ' | '\n' | '\t' | '\0' => {
247 self.cur += 1;
248 continue;
249 }
250 '=' => {
251 let start = self.cur;
252 self.cur += 1;
253 return Token {
254 token_type: TokenType::Assign,
255 value: &self.input[start..self.cur],
256 };
257 }
258 '(' => {
259 let start = self.cur;
260 self.cur += 1;
261 return Token {
262 token_type: TokenType::LPar,
263 value: &self.input[start..self.cur],
264 };
265 }
266 ')' => {
267 let start = self.cur;
268 self.cur += 1;
269 return Token {
270 token_type: TokenType::RPar,
271 value: &self.input[start..self.cur],
272 };
273 }
274 ',' => {
275 let start = self.cur;
276 self.cur += 1;
277 return Token {
278 token_type: TokenType::Comma,
279 value: &self.input[start..self.cur],
280 };
281 }
282 '\'' => {
283 let start = self.cur;
285 self.cur += 1;
286
287 while self.cur < bytes.len() {
289 if bytes[self.cur] as char == '\'' {
290 self.cur += 1;
291 return Token {
292 token_type: TokenType::QuotedString,
293 value: &self.input[start..self.cur],
294 };
295 }
296 self.cur += 1;
297 }
298
299 return Token {
300 token_type: TokenType::QuotedString,
301 value: &self.input[start..self.cur],
302 };
303 }
304 _ => {
305 let start = self.cur;
306 let ch = bytes[self.cur] as char;
307
308 if ch.is_alphabetic() || ch == '_' {
310 while self.cur < bytes.len() {
311 let c = bytes[self.cur] as char;
312 if !c.is_alphanumeric() && c != '_' {
313 break;
314 }
315 self.cur += 1;
316 }
317 return Token {
318 token_type: TokenType::Name,
319 value: &self.input[start..self.cur],
320 };
321 }
322
323 if ch.is_numeric() || ch == '-' {
325 self.cur += 1;
326 while self.cur < bytes.len() {
327 if !(bytes[self.cur] as char).is_numeric() {
328 break;
329 }
330 self.cur += 1;
331 }
332 return Token {
333 token_type: TokenType::Number,
334 value: &self.input[start..self.cur],
335 };
336 }
337
338 return Token {
339 token_type: TokenType::Invalid,
340 value: "",
341 };
342 }
343 }
344 }
345
346 Token { token_type: TokenType::Eos, value: "" }
347 }
348}
349
350fn get_type_meta(name: &str) -> TypeMeta {
353 match name {
354 "Array" => TypeMeta::Array,
355 "Null" => TypeMeta::Null,
356 "Nullable" => TypeMeta::Nullable,
357 "Tuple" => TypeMeta::Tuple,
358 "Enum8" | "Enum16" => TypeMeta::Enum,
359 "LowCardinality" => TypeMeta::LowCardinality,
360 "SimpleAggregateFunction" => TypeMeta::SimpleAggregateFunction,
361 "Map" => TypeMeta::Map,
362 _ => TypeMeta::Terminal,
363 }
364}
365
366fn get_type_code(name: &str) -> TypeCode {
369 match name {
370 "Void" => TypeCode::Void,
371 "Int8" => TypeCode::Int8,
372 "Int16" => TypeCode::Int16,
373 "Int32" => TypeCode::Int32,
374 "Int64" => TypeCode::Int64,
375 "Bool" | "UInt8" => TypeCode::UInt8,
376 "UInt16" => TypeCode::UInt16,
377 "UInt32" => TypeCode::UInt32,
378 "UInt64" => TypeCode::UInt64,
379 "Float32" => TypeCode::Float32,
380 "Float64" => TypeCode::Float64,
381 "String" => TypeCode::String,
382 "FixedString" => TypeCode::FixedString,
383 "DateTime" => TypeCode::DateTime,
384 "DateTime64" => TypeCode::DateTime64,
385 "Date" => TypeCode::Date,
386 "Date32" => TypeCode::Date32,
387 "Array" => TypeCode::Array,
388 "Nullable" => TypeCode::Nullable,
389 "Tuple" => TypeCode::Tuple,
390 "Enum8" => TypeCode::Enum8,
391 "Enum16" => TypeCode::Enum16,
392 "UUID" => TypeCode::UUID,
393 "IPv4" => TypeCode::IPv4,
394 "IPv6" => TypeCode::IPv6,
395 "Int128" => TypeCode::Int128,
396 "UInt128" => TypeCode::UInt128,
397 "Decimal" => TypeCode::Decimal,
398 "Decimal32" => TypeCode::Decimal32,
399 "Decimal64" => TypeCode::Decimal64,
400 "Decimal128" => TypeCode::Decimal128,
401 "LowCardinality" => TypeCode::LowCardinality,
402 "Map" => TypeCode::Map,
403 "Point" => TypeCode::Point,
404 "Ring" => TypeCode::Ring,
405 "Polygon" => TypeCode::Polygon,
406 "MultiPolygon" => TypeCode::MultiPolygon,
407 _ => TypeCode::Void,
408 }
409}
410
411fn validate_ast(ast: &TypeAst) -> bool {
414 if ast.meta == TypeMeta::Terminal
416 && ast.code == TypeCode::Void
417 && !ast.name.eq_ignore_ascii_case("void")
418 && !ast.name.is_empty()
419 {
420 return false;
421 }
422
423 true
424}
425
426thread_local! {
431 static TYPE_CACHE: RefCell<HashMap<String, TypeAst>> =
432 RefCell::new(HashMap::new());
433}
434
435pub fn parse_type_name(type_name: &str) -> Result<TypeAst> {
438 TYPE_CACHE.with(|cache| {
439 if let Some(ast) = cache.borrow().get(type_name) {
441 return Ok(ast.clone());
442 }
443
444 let mut ast = TypeAst::default();
446 let mut parser = TypeParser::new(type_name);
447
448 if !parser.parse(&mut ast) {
449 return Err(Error::Protocol(format!(
450 "Failed to parse type: {}",
451 type_name
452 )));
453 }
454
455 cache.borrow_mut().insert(type_name.to_string(), ast.clone());
457 Ok(ast)
458 })
459}
460
461#[cfg(test)]
462#[cfg_attr(coverage_nightly, coverage(off))]
463mod tests {
464 use super::*;
465
466 #[test]
467 fn test_simple_types() {
468 let ast = parse_type_name("Int32").unwrap();
469 assert_eq!(ast.meta, TypeMeta::Terminal);
470 assert_eq!(ast.code, TypeCode::Int32);
471 assert_eq!(ast.name, "Int32");
472 }
473
474 #[test]
475 fn test_array_type() {
476 let ast = parse_type_name("Array(String)").unwrap();
477 assert_eq!(ast.meta, TypeMeta::Array);
478 assert_eq!(ast.code, TypeCode::Array);
479 assert_eq!(ast.elements.len(), 1);
480 assert_eq!(ast.elements[0].code, TypeCode::String);
481 }
482
483 #[test]
484 fn test_nullable_type() {
485 let ast = parse_type_name("Nullable(UInt64)").unwrap();
486 assert_eq!(ast.meta, TypeMeta::Nullable);
487 assert_eq!(ast.elements.len(), 1);
488 assert_eq!(ast.elements[0].code, TypeCode::UInt64);
489 }
490
491 #[test]
492 fn test_nested_types() {
493 let ast = parse_type_name("Array(Nullable(String))").unwrap();
494 assert_eq!(ast.meta, TypeMeta::Array);
495 assert_eq!(ast.elements[0].meta, TypeMeta::Nullable);
496 assert_eq!(ast.elements[0].elements[0].code, TypeCode::String);
497 }
498
499 #[test]
500 fn test_fixed_string() {
501 let ast = parse_type_name("FixedString(10)").unwrap();
502 assert_eq!(ast.meta, TypeMeta::Terminal);
503 assert_eq!(ast.code, TypeCode::FixedString);
504 assert_eq!(ast.elements.len(), 1);
505 assert_eq!(ast.elements[0].meta, TypeMeta::Number);
506 assert_eq!(ast.elements[0].value, 10);
507 }
508
509 #[test]
510 fn test_enum8() {
511 let ast = parse_type_name("Enum8('red' = 1, 'green' = 2)").unwrap();
512 assert_eq!(ast.meta, TypeMeta::Enum);
513 assert_eq!(ast.code, TypeCode::Enum8);
514 assert_eq!(ast.elements.len(), 4); }
516
517 #[test]
518 fn test_caching() {
519 let ast1 = parse_type_name("String").unwrap();
520 let ast2 = parse_type_name("String").unwrap();
521 assert_eq!(ast1, ast2);
522
523 TYPE_CACHE.with(|cache| {
525 assert!(cache.borrow().contains_key("String"));
526 });
527 }
528}