reifydb_engine/function/text/
substring.rs1use reifydb_core::value::{column::ColumnData, container::Utf8Container};
5
6use crate::function::{ScalarFunction, ScalarFunctionContext};
7
8pub struct TextSubstring;
9
10impl TextSubstring {
11 pub fn new() -> Self {
12 Self
13 }
14}
15
16impl ScalarFunction for TextSubstring {
17 fn scalar(&self, ctx: ScalarFunctionContext) -> crate::Result<ColumnData> {
18 let columns = ctx.columns;
19 let row_count = ctx.row_count;
20
21 if columns.len() < 3 {
22 return Ok(ColumnData::utf8(Vec::<String>::new()));
23 }
24
25 let text_column = columns.get(0).unwrap();
26 let start_column = columns.get(1).unwrap();
27 let length_column = columns.get(2).unwrap();
28
29 match (text_column.data(), start_column.data(), length_column.data()) {
30 (
31 ColumnData::Utf8 {
32 container: text_container,
33 max_bytes,
34 },
35 ColumnData::Int4(start_container),
36 ColumnData::Int4(length_container),
37 ) => {
38 let mut result_data = Vec::with_capacity(text_container.data().len());
39 let mut result_bitvec = Vec::with_capacity(row_count);
40
41 for i in 0..row_count {
42 if text_container.is_defined(i)
43 && start_container.is_defined(i) && length_container.is_defined(i)
44 {
45 let original_str = &text_container[i];
46 let start_pos = start_container.get(i).copied().unwrap_or(0);
47 let length = length_container.get(i).copied().unwrap_or(0);
48
49 let start_idx = if start_pos < 0 {
51 0
52 } else {
53 start_pos as usize
54 };
55 let length_usize = if length < 0 {
56 0
57 } else {
58 length as usize
59 };
60
61 let substring = if start_idx >= original_str.len() {
62 String::new()
64 } else {
65 let chars: Vec<char> = original_str.chars().collect();
67 let end_idx = (start_idx + length_usize).min(chars.len());
68
69 if start_idx < chars.len() {
70 chars[start_idx..end_idx].iter().collect()
71 } else {
72 String::new()
73 }
74 };
75
76 result_data.push(substring);
77 result_bitvec.push(true);
78 } else {
79 result_data.push(String::new());
80 result_bitvec.push(false);
81 }
82 }
83
84 Ok(ColumnData::Utf8 {
85 container: Utf8Container::new(result_data, result_bitvec.into()),
86 max_bytes: *max_bytes,
87 })
88 }
89 (
91 ColumnData::Utf8 {
92 container: text_container,
93 max_bytes,
94 },
95 start_data,
96 length_data,
97 ) => {
98 let mut result_data = Vec::with_capacity(text_container.data().len());
99 let mut result_bitvec = Vec::with_capacity(row_count);
100
101 for i in 0..row_count {
102 if text_container.is_defined(i) {
103 let original_str = &text_container[i];
104
105 let start_pos = match start_data {
107 ColumnData::Int1(container) => {
108 container.get(i).map(|&v| v as i32).unwrap_or(0)
109 }
110 ColumnData::Int2(container) => {
111 container.get(i).map(|&v| v as i32).unwrap_or(0)
112 }
113 ColumnData::Int4(container) => {
114 container.get(i).copied().unwrap_or(0)
115 }
116 ColumnData::Int8(container) => {
117 container.get(i).map(|&v| v as i32).unwrap_or(0)
118 }
119 _ => 0,
120 };
121
122 let length = match length_data {
124 ColumnData::Int1(container) => {
125 container.get(i).map(|&v| v as i32).unwrap_or(0)
126 }
127 ColumnData::Int2(container) => {
128 container.get(i).map(|&v| v as i32).unwrap_or(0)
129 }
130 ColumnData::Int4(container) => {
131 container.get(i).copied().unwrap_or(0)
132 }
133 ColumnData::Int8(container) => {
134 container.get(i).map(|&v| v as i32).unwrap_or(0)
135 }
136 _ => 0,
137 };
138
139 let start_idx = if start_pos < 0 {
141 0
142 } else {
143 start_pos as usize
144 };
145 let length_usize = if length < 0 {
146 0
147 } else {
148 length as usize
149 };
150
151 let substring = if start_idx >= original_str.len() {
152 String::new()
154 } else {
155 let chars: Vec<char> = original_str.chars().collect();
157 let end_idx = (start_idx + length_usize).min(chars.len());
158
159 if start_idx < chars.len() {
160 chars[start_idx..end_idx].iter().collect()
161 } else {
162 String::new()
163 }
164 };
165
166 result_data.push(substring);
167 result_bitvec.push(true);
168 } else {
169 result_data.push(String::new());
170 result_bitvec.push(false);
171 }
172 }
173
174 Ok(ColumnData::Utf8 {
175 container: Utf8Container::new(result_data, result_bitvec.into()),
176 max_bytes: *max_bytes,
177 })
178 }
179 _ => unimplemented!("TextSubstring requires text, start position, and length parameters"),
180 }
181 }
182}