Skip to main content

reifydb_routine/procedure/rql/
tokenize.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4use std::sync::LazyLock;
5
6use bumpalo::Bump;
7use reifydb_core::value::column::{ColumnWithName, columns::Columns};
8use reifydb_rql::token::{token::TokenKind, tokenize};
9use reifydb_type::value::r#type::Type;
10
11use crate::{
12	procedure::rql::extract_query,
13	routine::{Routine, RoutineInfo, context::ProcedureContext, error::RoutineError},
14};
15
16static INFO: LazyLock<RoutineInfo> = LazyLock::new(|| RoutineInfo::new("rql::tokenize"));
17
18pub struct RqlTokenize;
19
20impl Default for RqlTokenize {
21	fn default() -> Self {
22		Self::new()
23	}
24}
25
26impl RqlTokenize {
27	pub fn new() -> Self {
28		Self
29	}
30}
31
32impl<'a, 'tx> Routine<ProcedureContext<'a, 'tx>> for RqlTokenize {
33	fn info(&self) -> &RoutineInfo {
34		&INFO
35	}
36
37	fn return_type(&self, _input_types: &[Type]) -> Type {
38		Type::Any
39	}
40
41	fn attaches_row_metadata(&self) -> bool {
42		false
43	}
44
45	fn execute(&self, ctx: &mut ProcedureContext<'a, 'tx>, _args: &Columns) -> Result<Columns, RoutineError> {
46		let query = extract_query(ctx.params, "rql::tokenize")?;
47
48		let bump = Bump::new();
49		let tokens = tokenize(&bump, query.as_str())?;
50
51		let mut idx_col: Vec<i32> = Vec::with_capacity(tokens.len());
52		let mut line_col: Vec<i32> = Vec::with_capacity(tokens.len());
53		let mut column_col: Vec<i32> = Vec::with_capacity(tokens.len());
54		let mut kind_col: Vec<String> = Vec::with_capacity(tokens.len());
55		let mut value_col: Vec<String> = Vec::with_capacity(tokens.len());
56
57		for (i, token) in tokens.iter().enumerate() {
58			let (kind, value) = describe_token(&token.kind, token.value());
59			idx_col.push(i as i32);
60			line_col.push(token.fragment.line().0 as i32);
61			column_col.push(token.fragment.column().0 as i32);
62			kind_col.push(kind);
63			value_col.push(value);
64		}
65
66		Ok(Columns::new(vec![
67			ColumnWithName::int4("idx", idx_col),
68			ColumnWithName::int4("line", line_col),
69			ColumnWithName::int4("column", column_col),
70			ColumnWithName::utf8("kind", kind_col),
71			ColumnWithName::utf8("value", value_col),
72		]))
73	}
74}
75
76fn describe_token(kind: &TokenKind, text: &str) -> (String, String) {
77	match kind {
78		TokenKind::EOF => ("EOF".to_string(), String::new()),
79		TokenKind::Identifier => ("Identifier".to_string(), text.to_string()),
80		TokenKind::Keyword(kw) => ("Keyword".to_string(), format!("{:?}", kw)),
81		TokenKind::Literal(lit) => ("Literal".to_string(), format!("{:?}", lit)),
82		TokenKind::Operator(op) => ("Operator".to_string(), format!("{:?}", op)),
83		TokenKind::Variable => ("Variable".to_string(), text.to_string()),
84		TokenKind::Separator(sep) => ("Separator".to_string(), format!("{:?}", sep)),
85		TokenKind::SystemColumn => ("SystemColumn".to_string(), text.to_string()),
86	}
87}