1use std::fmt::{Display, Formatter};
5use std::sync::Arc;
6
7use vortex_buffer::BufferString;
8use vortex_dtype::Nullability::NonNullable;
9use vortex_dtype::{DType, Nullability};
10use vortex_error::{VortexError, VortexExpect as _, VortexResult, vortex_bail, vortex_err};
11
12use crate::{InnerScalarValue, Scalar, ScalarValue};
13
14#[derive(Debug, Hash)]
19pub struct Utf8Scalar<'a> {
20 dtype: &'a DType,
21 value: Option<Arc<BufferString>>,
22}
23
24impl Display for Utf8Scalar<'_> {
25 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
26 match &self.value {
27 None => write!(f, "null"),
28 Some(v) => write!(f, "\"{}\"", v.as_str()),
29 }
30 }
31}
32
33impl PartialEq for Utf8Scalar<'_> {
34 fn eq(&self, other: &Self) -> bool {
35 self.dtype.eq_ignore_nullability(other.dtype) && self.value == other.value
36 }
37}
38
39impl Eq for Utf8Scalar<'_> {}
40
41impl PartialOrd for Utf8Scalar<'_> {
42 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
43 Some(self.value.cmp(&other.value))
44 }
45}
46
47impl Ord for Utf8Scalar<'_> {
48 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
49 self.value.cmp(&other.value)
50 }
51}
52
53impl<'a> Utf8Scalar<'a> {
54 pub fn from_scalar_value(dtype: &'a DType, value: ScalarValue) -> VortexResult<Self> {
60 if !matches!(dtype, DType::Utf8(..)) {
61 vortex_bail!("Can only construct utf8 scalar from utf8 dtype, found {dtype}")
62 }
63 Ok(Self {
64 dtype,
65 value: value.as_buffer_string()?,
66 })
67 }
68
69 #[inline]
71 pub fn dtype(&self) -> &'a DType {
72 self.dtype
73 }
74
75 pub fn value(&self) -> Option<BufferString> {
77 self.value.as_ref().map(|v| v.as_ref().clone())
78 }
79
80 pub fn upper_bound(self, max_length: usize) -> Option<Self> {
84 if let Some(value) = self.value {
85 if value.len() > max_length {
86 let utf8_split_pos = (max_length.saturating_sub(3)..=max_length)
87 .rfind(|p| value.is_char_boundary(*p))
88 .vortex_expect("Failed to find utf8 character boundary");
89
90 let utf8_mut = value
91 .get(..utf8_split_pos)
92 .vortex_expect("Slicing with existing index");
93
94 for (idx, original_char) in utf8_mut.char_indices().rev() {
95 let original_len = original_char.len_utf8();
96 if let Some(next_char) = char::from_u32(original_char as u32 + 1) {
97 if next_char.len_utf8() == original_len {
99 let sliced = value.inner().slice(0..idx + original_len);
100 drop(value);
101 let mut result = sliced.into_mut();
102 next_char.encode_utf8(&mut result[idx..]);
103 return Some(Self {
104 dtype: self.dtype,
105 value: Some(Arc::new(unsafe {
106 BufferString::new_unchecked(result.freeze())
107 })),
108 });
109 }
110 }
111 }
112 None
113 } else {
114 Some(Self {
115 dtype: self.dtype,
116 value: Some(value),
117 })
118 }
119 } else {
120 Some(self)
121 }
122 }
123
124 pub fn lower_bound(self, max_length: usize) -> Self {
126 if let Some(value) = self.value {
127 if value.len() > max_length {
128 let utf8_split_pos = (max_length.saturating_sub(3)..=max_length)
130 .rfind(|p| value.is_char_boundary(*p))
131 .vortex_expect("Failed to find utf8 character boundary");
132
133 Self {
134 dtype: self.dtype,
135 value: Some(Arc::new(unsafe {
136 BufferString::new_unchecked(value.inner().slice(0..utf8_split_pos))
137 })),
138 }
139 } else {
140 Self {
141 dtype: self.dtype,
142 value: Some(value),
143 }
144 }
145 } else {
146 self
147 }
148 }
149
150 pub(crate) fn cast(&self, dtype: &DType) -> VortexResult<Scalar> {
151 if !matches!(dtype, DType::Utf8(..)) {
152 vortex_bail!("Can't cast utf8 to {}", dtype)
153 }
154 Ok(Scalar::new(
155 dtype.clone(),
156 ScalarValue(InnerScalarValue::BufferString(
157 self.value
158 .as_ref()
159 .vortex_expect("nullness handled in Scalar::cast")
160 .clone(),
161 )),
162 ))
163 }
164
165 pub fn len(&self) -> Option<usize> {
167 self.value.as_ref().map(|v| v.len())
168 }
169
170 pub fn is_empty(&self) -> Option<bool> {
172 self.value.as_ref().map(|v| v.is_empty())
173 }
174
175 pub fn into_value(self) -> ScalarValue {
177 ScalarValue(
178 self.value
179 .map(InnerScalarValue::BufferString)
180 .unwrap_or_else(|| InnerScalarValue::Null),
181 )
182 }
183}
184
185impl Scalar {
186 pub fn utf8<B>(str: B, nullability: Nullability) -> Self
192 where
193 B: Into<BufferString>,
194 {
195 Self::try_utf8(str, nullability).unwrap()
196 }
197
198 pub fn try_utf8<B>(
204 str: B,
205 nullability: Nullability,
206 ) -> Result<Self, <B as TryInto<BufferString>>::Error>
207 where
208 B: TryInto<BufferString>,
209 {
210 Ok(Self {
211 dtype: DType::Utf8(nullability),
212 value: ScalarValue(InnerScalarValue::BufferString(Arc::new(str.try_into()?))),
213 })
214 }
215}
216
217impl<'a> TryFrom<&'a Scalar> for Utf8Scalar<'a> {
218 type Error = VortexError;
219
220 fn try_from(value: &'a Scalar) -> Result<Self, Self::Error> {
221 if !matches!(value.dtype(), DType::Utf8(_)) {
222 vortex_bail!("Expected utf8 scalar, found {}", value.dtype())
223 }
224 Ok(Self {
225 dtype: value.dtype(),
226 value: value.value.as_buffer_string()?,
227 })
228 }
229}
230
231impl<'a> TryFrom<&'a Scalar> for String {
232 type Error = VortexError;
233
234 fn try_from(value: &'a Scalar) -> Result<Self, Self::Error> {
235 Ok(BufferString::try_from(value)?.to_string())
236 }
237}
238
239impl From<&str> for Scalar {
240 fn from(value: &str) -> Self {
241 Self {
242 dtype: DType::Utf8(NonNullable),
243 value: ScalarValue(InnerScalarValue::BufferString(Arc::new(
244 value.to_string().into(),
245 ))),
246 }
247 }
248}
249
250impl From<String> for Scalar {
251 fn from(value: String) -> Self {
252 Self {
253 dtype: DType::Utf8(NonNullable),
254 value: ScalarValue(InnerScalarValue::BufferString(Arc::new(value.into()))),
255 }
256 }
257}
258
259impl From<BufferString> for Scalar {
260 fn from(value: BufferString) -> Self {
261 Self {
262 dtype: DType::Utf8(NonNullable),
263 value: ScalarValue(InnerScalarValue::BufferString(Arc::new(value))),
264 }
265 }
266}
267
268impl From<Arc<BufferString>> for Scalar {
269 fn from(value: Arc<BufferString>) -> Self {
270 Self {
271 dtype: DType::Utf8(NonNullable),
272 value: ScalarValue(InnerScalarValue::BufferString(value)),
273 }
274 }
275}
276
277impl<'a> TryFrom<&'a Scalar> for BufferString {
278 type Error = VortexError;
279
280 fn try_from(scalar: &'a Scalar) -> VortexResult<Self> {
281 <Option<BufferString>>::try_from(scalar)?
282 .ok_or_else(|| vortex_err!("Can't extract present value from null scalar"))
283 }
284}
285
286impl TryFrom<Scalar> for BufferString {
287 type Error = VortexError;
288
289 fn try_from(scalar: Scalar) -> Result<Self, Self::Error> {
290 Self::try_from(&scalar)
291 }
292}
293
294impl<'a> TryFrom<&'a Scalar> for Option<BufferString> {
295 type Error = VortexError;
296
297 fn try_from(scalar: &'a Scalar) -> Result<Self, Self::Error> {
298 Ok(Utf8Scalar::try_from(scalar)?.value())
299 }
300}
301
302impl TryFrom<Scalar> for Option<BufferString> {
303 type Error = VortexError;
304
305 fn try_from(scalar: Scalar) -> Result<Self, Self::Error> {
306 Self::try_from(&scalar)
307 }
308}
309
310#[cfg(test)]
311mod tests {
312 use vortex_dtype::Nullability;
313 use vortex_error::{VortexExpect, VortexUnwrap};
314
315 use crate::{Scalar, Utf8Scalar};
316
317 #[test]
318 fn lower_bound() {
319 let utf8 = Scalar::utf8("snowman⛄️snowman", Nullability::NonNullable);
320 let expected = Scalar::utf8("snowman", Nullability::NonNullable);
321 assert_eq!(
322 Utf8Scalar::try_from(&utf8).vortex_unwrap().lower_bound(9),
323 Utf8Scalar::try_from(&expected).vortex_unwrap()
324 );
325 }
326
327 #[test]
328 fn upper_bound() {
329 let utf8 = Scalar::utf8("char🪩", Nullability::NonNullable);
330 let expected = Scalar::utf8("chas", Nullability::NonNullable);
331 assert_eq!(
332 Utf8Scalar::try_from(&utf8)
333 .vortex_unwrap()
334 .upper_bound(5)
335 .vortex_expect("must have upper bound"),
336 Utf8Scalar::try_from(&expected).vortex_unwrap()
337 );
338 }
339
340 #[test]
341 fn upper_bound_overflow() {
342 let utf8 = Scalar::utf8("🂑🂒🂓", Nullability::NonNullable);
343 assert!(
344 Utf8Scalar::try_from(&utf8)
345 .vortex_unwrap()
346 .upper_bound(2)
347 .is_none()
348 );
349 }
350}