reifydb_engine/function/text/
substring.rs

1// Copyright (c) reifydb.com 2025
2// This file is licensed under the AGPL-3.0-or-later, see license.md file
3
4use reifydb_core::value::{column::ColumnData, container::Utf8Container};
5
6use crate::function::{ScalarFunction, ScalarFunctionContext};
7
8pub struct TextSubstring;
9
10impl TextSubstring {
11	pub fn new() -> Self {
12		Self
13	}
14}
15
16impl ScalarFunction for TextSubstring {
17	fn scalar(&self, ctx: ScalarFunctionContext) -> crate::Result<ColumnData> {
18		let columns = ctx.columns;
19		let row_count = ctx.row_count;
20
21		if columns.len() < 3 {
22			return Ok(ColumnData::utf8(Vec::<String>::new()));
23		}
24
25		let text_column = columns.get(0).unwrap();
26		let start_column = columns.get(1).unwrap();
27		let length_column = columns.get(2).unwrap();
28
29		match (text_column.data(), start_column.data(), length_column.data()) {
30			(
31				ColumnData::Utf8 {
32					container: text_container,
33					max_bytes,
34				},
35				ColumnData::Int4(start_container),
36				ColumnData::Int4(length_container),
37			) => {
38				let mut result_data = Vec::with_capacity(text_container.data().len());
39				let mut result_bitvec = Vec::with_capacity(row_count);
40
41				for i in 0..row_count {
42					if text_container.is_defined(i)
43						&& start_container.is_defined(i) && length_container.is_defined(i)
44					{
45						let original_str = &text_container[i];
46						let start_pos = start_container.get(i).copied().unwrap_or(0);
47						let length = length_container.get(i).copied().unwrap_or(0);
48
49						// Convert to 0-based indexing (RQL uses 0-based)
50						let start_idx = if start_pos < 0 {
51							0
52						} else {
53							start_pos as usize
54						};
55						let length_usize = if length < 0 {
56							0
57						} else {
58							length as usize
59						};
60
61						let substring = if start_idx >= original_str.len() {
62							// Start position is beyond string length
63							String::new()
64						} else {
65							// Get the substring with proper Unicode handling
66							let chars: Vec<char> = original_str.chars().collect();
67							let end_idx = (start_idx + length_usize).min(chars.len());
68
69							if start_idx < chars.len() {
70								chars[start_idx..end_idx].iter().collect()
71							} else {
72								String::new()
73							}
74						};
75
76						result_data.push(substring);
77						result_bitvec.push(true);
78					} else {
79						result_data.push(String::new());
80						result_bitvec.push(false);
81					}
82				}
83
84				Ok(ColumnData::Utf8 {
85					container: Utf8Container::new(result_data, result_bitvec.into()),
86					max_bytes: *max_bytes,
87				})
88			}
89			// Handle cases where start/length are different integer types
90			(
91				ColumnData::Utf8 {
92					container: text_container,
93					max_bytes,
94				},
95				start_data,
96				length_data,
97			) => {
98				let mut result_data = Vec::with_capacity(text_container.data().len());
99				let mut result_bitvec = Vec::with_capacity(row_count);
100
101				for i in 0..row_count {
102					if text_container.is_defined(i) {
103						let original_str = &text_container[i];
104
105						// Extract start position from various integer types
106						let start_pos = match start_data {
107							ColumnData::Int1(container) => {
108								container.get(i).map(|&v| v as i32).unwrap_or(0)
109							}
110							ColumnData::Int2(container) => {
111								container.get(i).map(|&v| v as i32).unwrap_or(0)
112							}
113							ColumnData::Int4(container) => {
114								container.get(i).copied().unwrap_or(0)
115							}
116							ColumnData::Int8(container) => {
117								container.get(i).map(|&v| v as i32).unwrap_or(0)
118							}
119							_ => 0,
120						};
121
122						// Extract length from various integer types
123						let length = match length_data {
124							ColumnData::Int1(container) => {
125								container.get(i).map(|&v| v as i32).unwrap_or(0)
126							}
127							ColumnData::Int2(container) => {
128								container.get(i).map(|&v| v as i32).unwrap_or(0)
129							}
130							ColumnData::Int4(container) => {
131								container.get(i).copied().unwrap_or(0)
132							}
133							ColumnData::Int8(container) => {
134								container.get(i).map(|&v| v as i32).unwrap_or(0)
135							}
136							_ => 0,
137						};
138
139						// Convert to 0-based indexing
140						let start_idx = if start_pos < 0 {
141							0
142						} else {
143							start_pos as usize
144						};
145						let length_usize = if length < 0 {
146							0
147						} else {
148							length as usize
149						};
150
151						let substring = if start_idx >= original_str.len() {
152							// Start position is beyond string length
153							String::new()
154						} else {
155							// Get the substring with proper Unicode handling
156							let chars: Vec<char> = original_str.chars().collect();
157							let end_idx = (start_idx + length_usize).min(chars.len());
158
159							if start_idx < chars.len() {
160								chars[start_idx..end_idx].iter().collect()
161							} else {
162								String::new()
163							}
164						};
165
166						result_data.push(substring);
167						result_bitvec.push(true);
168					} else {
169						result_data.push(String::new());
170						result_bitvec.push(false);
171					}
172				}
173
174				Ok(ColumnData::Utf8 {
175					container: Utf8Container::new(result_data, result_bitvec.into()),
176					max_bytes: *max_bytes,
177				})
178			}
179			_ => unimplemented!("TextSubstring requires text, start position, and length parameters"),
180		}
181	}
182}