reifydb_routine/procedure/rql/
tokenize.rs1use std::sync::LazyLock;
5
6use bumpalo::Bump;
7use reifydb_core::value::column::{ColumnWithName, columns::Columns};
8use reifydb_rql::token::{token::TokenKind, tokenize};
9use reifydb_type::value::r#type::Type;
10
11use crate::{
12 procedure::rql::extract_query,
13 routine::{Routine, RoutineInfo, context::ProcedureContext, error::RoutineError},
14};
15
16static INFO: LazyLock<RoutineInfo> = LazyLock::new(|| RoutineInfo::new("rql::tokenize"));
17
18pub struct RqlTokenize;
19
20impl Default for RqlTokenize {
21 fn default() -> Self {
22 Self::new()
23 }
24}
25
26impl RqlTokenize {
27 pub fn new() -> Self {
28 Self
29 }
30}
31
32impl<'a, 'tx> Routine<ProcedureContext<'a, 'tx>> for RqlTokenize {
33 fn info(&self) -> &RoutineInfo {
34 &INFO
35 }
36
37 fn return_type(&self, _input_types: &[Type]) -> Type {
38 Type::Any
39 }
40
41 fn attaches_row_metadata(&self) -> bool {
42 false
43 }
44
45 fn execute(&self, ctx: &mut ProcedureContext<'a, 'tx>, _args: &Columns) -> Result<Columns, RoutineError> {
46 let query = extract_query(ctx.params, "rql::tokenize")?;
47
48 let bump = Bump::new();
49 let tokens = tokenize(&bump, query.as_str())?;
50
51 let mut idx_col: Vec<i32> = Vec::with_capacity(tokens.len());
52 let mut line_col: Vec<i32> = Vec::with_capacity(tokens.len());
53 let mut column_col: Vec<i32> = Vec::with_capacity(tokens.len());
54 let mut kind_col: Vec<String> = Vec::with_capacity(tokens.len());
55 let mut value_col: Vec<String> = Vec::with_capacity(tokens.len());
56
57 for (i, token) in tokens.iter().enumerate() {
58 let (kind, value) = describe_token(&token.kind, token.value());
59 idx_col.push(i as i32);
60 line_col.push(token.fragment.line().0 as i32);
61 column_col.push(token.fragment.column().0 as i32);
62 kind_col.push(kind);
63 value_col.push(value);
64 }
65
66 Ok(Columns::new(vec![
67 ColumnWithName::int4("idx", idx_col),
68 ColumnWithName::int4("line", line_col),
69 ColumnWithName::int4("column", column_col),
70 ColumnWithName::utf8("kind", kind_col),
71 ColumnWithName::utf8("value", value_col),
72 ]))
73 }
74}
75
76fn describe_token(kind: &TokenKind, text: &str) -> (String, String) {
77 match kind {
78 TokenKind::EOF => ("EOF".to_string(), String::new()),
79 TokenKind::Identifier => ("Identifier".to_string(), text.to_string()),
80 TokenKind::Keyword(kw) => ("Keyword".to_string(), format!("{:?}", kw)),
81 TokenKind::Literal(lit) => ("Literal".to_string(), format!("{:?}", lit)),
82 TokenKind::Operator(op) => ("Operator".to_string(), format!("{:?}", op)),
83 TokenKind::Variable => ("Variable".to_string(), text.to_string()),
84 TokenKind::Separator(sep) => ("Separator".to_string(), format!("{:?}", sep)),
85 TokenKind::SystemColumn => ("SystemColumn".to_string(), text.to_string()),
86 }
87}