1#[cfg(feature = "fast_hash")]
19use ahash::{AHashMap, AHashSet};
20#[cfg(not(feature = "fast_hash"))]
21use std::collections::{HashMap, HashSet};
22
23use minarrow::{
24 Bitmask, BooleanArray, CategoricalArray, Integer, IntegerArray, MaskedArray, StringArray,
25 Vec64,
26 aliases::{CategoricalAVT, StringAVT},
27};
28#[cfg(feature = "regex")]
29use regex::Regex;
30
31use crate::errors::KernelError;
32use crate::utils::confirm_mask_capacity;
33use std::marker::PhantomData;
34
35#[inline(always)]
37pub fn string_predicate_masks<'a>(
38 lhs_mask: Option<&'a Bitmask>,
39 rhs_mask: Option<&'a Bitmask>,
40 len: usize,
41) -> (Option<&'a Bitmask>, Option<&'a Bitmask>, Bitmask) {
42 let out = Bitmask::new_set_all(len, false);
43 (lhs_mask, rhs_mask, out)
44}
45
46pub fn concat_str_str<T: Integer>(lhs: StringAVT<T>, rhs: StringAVT<T>) -> StringArray<T> {
70 let (larr, loff, llen) = lhs;
71 let (rarr, roff, rlen) = rhs;
72 let len = llen.min(rlen);
73
74 let (lmask, rmask, mut out_mask) =
75 string_predicate_masks(larr.null_mask.as_ref(), rarr.null_mask.as_ref(), len);
76 let _ = confirm_mask_capacity(larr.len(), lmask);
77 let _ = confirm_mask_capacity(rarr.len(), rmask);
78
79 let mut total_bytes = 0;
81 for i in 0..len {
82 let valid = lmask.map_or(true, |m| unsafe { m.get_unchecked(loff + i) })
83 && rmask.map_or(true, |m| unsafe { m.get_unchecked(roff + i) });
84 if valid {
85 let l = unsafe { larr.get_str_unchecked(loff + i) };
86 let r = unsafe { rarr.get_str_unchecked(roff + i) };
87 total_bytes += l.len() + r.len();
88 }
89 }
90
91 let mut offsets = Vec64::<T>::with_capacity(len + 1);
93 unsafe {
94 offsets.set_len(len + 1);
95 }
96 let mut values = Vec64::<u8>::with_capacity(total_bytes);
97
98 offsets[0] = T::zero();
100 let mut cur = 0;
101
102 for i in 0..len {
103 let valid = lmask.map_or(true, |m| unsafe { m.get_unchecked(loff + i) })
104 && rmask.map_or(true, |m| unsafe { m.get_unchecked(roff + i) });
105
106 if valid {
107 let l = unsafe { larr.get_str_unchecked(loff + i).as_bytes() };
108 let r = unsafe { rarr.get_str_unchecked(roff + i).as_bytes() };
109
110 values.extend_from_slice(l);
111 values.extend_from_slice(r);
112 cur += l.len() + r.len();
113
114 unsafe {
115 out_mask.set_unchecked(i, true);
116 }
117 } else {
118 unsafe {
119 out_mask.set_unchecked(i, false);
120 }
121 }
122
123 offsets[i + 1] = T::from_usize(cur);
124 }
125
126 StringArray {
127 offsets: offsets.into(),
128 data: values.into(),
129 null_mask: Some(out_mask),
130 }
131}
132
133pub fn concat_dict_dict<T: Integer>(
154 lhs: CategoricalAVT<T>,
155 rhs: CategoricalAVT<T>,
156) -> Result<CategoricalArray<T>, KernelError> {
157 let (larr, loff, llen) = lhs;
158 let (rarr, roff, rlen) = rhs;
159 let len = llen.min(rlen);
160
161 let (lmask, rmask, mut out_mask) =
162 string_predicate_masks(larr.null_mask.as_ref(), rarr.null_mask.as_ref(), len);
163 let _ = confirm_mask_capacity(larr.data.len(), lmask)?;
164 let _ = confirm_mask_capacity(rarr.data.len(), rmask)?;
165
166 let mut data = Vec64::<T>::with_capacity(len);
168 unsafe {
169 data.set_len(len);
170 }
171
172 let mut unique_values = Vec64::<String>::with_capacity(len);
173 #[cfg(feature = "fast_hash")]
174 let mut seen: AHashMap<String, T> = AHashMap::with_capacity(len);
175 #[cfg(not(feature = "fast_hash"))]
176 let mut seen: HashMap<String, T> = HashMap::with_capacity(len);
177 let mut unique_idx = 0;
178
179 for i in 0..len {
180 let valid = lmask.map_or(true, |m| unsafe { m.get_unchecked(loff + i) })
181 && rmask.map_or(true, |m| unsafe { m.get_unchecked(roff + i) });
182
183 if valid {
184 let l = unsafe { larr.get_str_unchecked(loff + i) };
185 let r = unsafe { rarr.get_str_unchecked(roff + i) };
186 let cat = format!("{l}{r}");
187
188 let idx = match seen.get(&cat) {
189 Some(ix) => *ix,
190 None => {
191 let ix = T::from_usize(unique_idx);
192 unique_values.push(cat.clone());
193 seen.insert(cat, ix);
194 unique_idx += 1;
195 ix
196 }
197 };
198
199 unsafe {
200 *data.get_unchecked_mut(i) = idx;
201 out_mask.set_unchecked(i, true);
202 }
203 } else {
204 unsafe {
205 *data.get_unchecked_mut(i) = T::zero();
206 out_mask.set_unchecked(i, false);
207 }
208 }
209 }
210
211 unsafe {
212 unique_values.set_len(unique_idx);
213 }
214
215 Ok(CategoricalArray {
216 data: data.into(),
217 unique_values,
218 null_mask: Some(out_mask),
219 })
220}
221
222pub fn concat_str_dict<T: Integer, U: Integer>(
241 lhs: StringAVT<T>,
242 rhs: CategoricalAVT<U>,
243) -> Result<StringArray<T>, KernelError> {
244 let (larr, loff, llen) = lhs;
245 let (rarr, roff, rlen) = rhs;
246 let len = llen.min(rlen);
247
248 let (lmask, rmask, mut out_mask) =
249 string_predicate_masks(larr.null_mask.as_ref(), rarr.null_mask.as_ref(), len);
250 let _ = confirm_mask_capacity(larr.len(), lmask)?;
251 let _ = confirm_mask_capacity(rarr.data.len(), rmask)?;
252
253 let mut total_bytes = 0;
255 for i in 0..len {
256 let valid = lmask.map_or(true, |m| unsafe { m.get_unchecked(loff + i) })
257 && rmask.map_or(true, |m| unsafe { m.get_unchecked(roff + i) });
258 if valid {
259 let a = unsafe { larr.get_str_unchecked(loff + i) };
260 let b = unsafe { rarr.get_str_unchecked(roff + i) };
261 total_bytes += a.len() + b.len();
262 }
263 }
264
265 let mut offsets = Vec64::<T>::with_capacity(len + 1);
267 unsafe {
268 offsets.set_len(len + 1);
269 }
270 let mut values = Vec64::<u8>::with_capacity(total_bytes);
271
272 offsets[0] = T::zero();
274 let mut cur = 0;
275
276 for i in 0..len {
277 let valid = lmask.map_or(true, |m| unsafe { m.get_unchecked(loff + i) })
278 && rmask.map_or(true, |m| unsafe { m.get_unchecked(roff + i) });
279
280 if valid {
281 let a = unsafe { larr.get_str_unchecked(loff + i).as_bytes() };
282 let b = unsafe { rarr.get_str_unchecked(roff + i).as_bytes() };
283
284 values.extend_from_slice(a);
285 values.extend_from_slice(b);
286 cur += a.len() + b.len();
287
288 unsafe {
289 out_mask.set_unchecked(i, true);
290 }
291 } else {
292 unsafe {
293 out_mask.set_unchecked(i, false);
294 }
295 }
296
297 offsets[i + 1] = T::from_usize(cur);
298 }
299
300 Ok(StringArray {
301 offsets: offsets.into(),
302 data: values.into(),
303 null_mask: Some(out_mask),
304 })
305}
306
307pub fn concat_dict_str<T: Integer, U: Integer>(
326 lhs: CategoricalAVT<U>,
327 rhs: StringAVT<T>,
328) -> Result<StringArray<T>, KernelError> {
329 concat_str_dict(rhs, lhs)
330}
331
332macro_rules! binary_str_pred_loop {
333 ($len:expr, $lmask:expr, $rmask:expr, $out_mask:expr, $lhs:expr, $rhs:expr, $method:ident) => {{
334 let mut data = Bitmask::new_set_all($len, false);
335 let lhs_off = $lhs.1;
337 let rhs_off = $rhs.1;
338 let _ = confirm_mask_capacity(lhs_off + $len, $lmask)?;
339 let _ = confirm_mask_capacity(rhs_off + $len, $rmask)?;
340 for i in 0..$len {
341 let li = lhs_off + i;
342 let ri = rhs_off + i;
343 let valid = $lmask.map_or(true, |m| unsafe { m.get_unchecked(li) })
344 && $rmask.map_or(true, |m| unsafe { m.get_unchecked(ri) });
345 let result = valid && {
346 let s = unsafe { $lhs.0.get_str_unchecked(li) };
347 let pat = unsafe { $rhs.0.get_str_unchecked(ri) };
348 !pat.is_empty() && s.$method(pat)
349 };
350 unsafe {
351 data.set_unchecked(i, result);
352 $out_mask.set_unchecked(i, valid);
353 }
354 }
355 data
356 }};
357}
358
359macro_rules! str_predicate {
363 ($fn_name:ident, $method:ident) => {
364 pub fn $fn_name<T: Integer, U: Integer>(
383 lhs: StringAVT<T>,
384 rhs: StringAVT<U>,
385 ) -> BooleanArray<()> {
386 let (larr, loff, llen) = lhs;
387 let (rarr, roff, rlen) = rhs;
388 let len = llen.min(rlen);
389 let lmask = larr.null_mask.as_ref();
391 let rmask = rarr.null_mask.as_ref();
392 let mut out = Bitmask::new_set_all(len, false);
393
394 for i in 0..len {
395 unsafe {
396 let lv = lmask.map_or(true, |m| m.get_unchecked(loff + i));
398 let rv = rmask.map_or(true, |m| m.get_unchecked(roff + i));
399 if !lv || !rv {
400 continue;
402 }
403 let ls = larr.offsets[loff + i].to_usize();
405 let le = larr.offsets[loff + i + 1].to_usize();
406 let rs = rarr.offsets[roff + i].to_usize();
407 let re = rarr.offsets[roff + i + 1].to_usize();
408 let s = std::str::from_utf8_unchecked(&larr.data[ls..le]);
409 let p = std::str::from_utf8_unchecked(&rarr.data[rs..re]);
410 if !p.is_empty() && s.$method(p) {
412 out.set_unchecked(i, true);
413 }
414 }
415 }
416 BooleanArray {
418 data: out.into(),
419 null_mask: None,
420 len,
421 _phantom: PhantomData,
422 }
423 }
424 };
425}
426
427macro_rules! str_cat_predicate {
429 ($fn_name:ident, $method:ident) => {
430 pub fn $fn_name<T: Integer, U: Integer>(
449 lhs: StringAVT<T>,
450 rhs: CategoricalAVT<U>,
451 ) -> Result<BooleanArray<()>, KernelError> {
452 let (larr, loff, llen) = lhs;
453 let (rarr, roff, rlen) = rhs;
454 let len = llen.min(rlen);
455
456 let (lmask, rmask, mut out_mask) =
457 string_predicate_masks(larr.null_mask.as_ref(), rarr.null_mask.as_ref(), len);
458
459 let data = binary_str_pred_loop!(
460 len,
461 lmask,
462 rmask,
463 out_mask,
464 (larr, loff),
465 (rarr, roff),
466 $method
467 );
468
469 Ok(BooleanArray {
470 data: data.into(),
471 null_mask: Some(out_mask),
472 len,
473 _phantom: PhantomData,
474 })
475 }
476 };
477}
478
479macro_rules! cat_cat_predicate {
481 ($fn_name:ident, $method:ident) => {
482 pub fn $fn_name<T: Integer>(
500 lhs: CategoricalAVT<T>,
501 rhs: CategoricalAVT<T>,
502 ) -> Result<BooleanArray<()>, KernelError> {
503 let (larr, loff, llen) = lhs;
504 let (rarr, roff, rlen) = rhs;
505 let len = llen.min(rlen);
506
507 let (lmask, rmask, mut out_mask) =
508 string_predicate_masks(larr.null_mask.as_ref(), rarr.null_mask.as_ref(), len);
509
510 let data = binary_str_pred_loop!(
511 len,
512 lmask,
513 rmask,
514 out_mask,
515 (larr, loff),
516 (rarr, roff),
517 $method
518 );
519
520 Ok(BooleanArray {
521 data: data.into(),
522 null_mask: Some(out_mask),
523 len,
524 _phantom: PhantomData,
525 })
526 }
527 };
528}
529
530macro_rules! dict_str_predicate {
532 ($fn_name:ident, $method:ident) => {
533 pub fn $fn_name<T: Integer, U: Integer>(
552 lhs: CategoricalAVT<T>,
553 rhs: StringAVT<U>,
554 ) -> Result<BooleanArray<()>, KernelError> {
555 let (larr, loff, llen) = lhs;
556 let (rarr, roff, rlen) = rhs;
557 let len = llen.min(rlen);
558
559 let (lmask, rmask, mut out_mask) =
560 string_predicate_masks(larr.null_mask.as_ref(), rarr.null_mask.as_ref(), len);
561 let _ = confirm_mask_capacity(larr.data.len(), lmask)?;
562 let _ = confirm_mask_capacity(rarr.len(), rmask)?;
563
564 let mut data = Bitmask::new_set_all(len, false);
565 for i in 0..len {
566 let valid = lmask.map_or(true, |m| unsafe { m.get_unchecked(loff + i) })
567 && rmask.map_or(true, |m| unsafe { m.get_unchecked(roff + i) });
568 let match_i = valid && {
569 let hay = unsafe { larr.get_str_unchecked(loff + i) };
570 let needle = unsafe { rarr.get_str_unchecked(roff + i) };
571 !needle.is_empty() && hay.$method(needle)
572 };
573 unsafe { data.set_unchecked(i, match_i) };
574 unsafe { out_mask.set_unchecked(i, valid) };
575 }
576
577 Ok(BooleanArray {
578 data: data.into(),
579 null_mask: Some(out_mask),
580 len,
581 _phantom: PhantomData,
582 })
583 }
584 };
585}
586
587str_predicate!(contains_str_str, contains);
588str_predicate!(starts_with_str_str, starts_with);
589str_predicate!(ends_with_str_str, ends_with);
590str_cat_predicate!(contains_str_dict, contains);
591cat_cat_predicate!(contains_dict_dict, contains);
592str_cat_predicate!(starts_with_str_dict, starts_with);
593cat_cat_predicate!(starts_with_dict_dict, starts_with);
594str_cat_predicate!(ends_with_str_dict, ends_with);
595cat_cat_predicate!(ends_with_dict_dict, ends_with);
596dict_str_predicate!(contains_dict_str, contains);
597dict_str_predicate!(starts_with_dict_str, starts_with);
598dict_str_predicate!(ends_with_dict_str, ends_with);
599
600#[cfg(feature = "regex")]
603macro_rules! regex_match_loop {
604 ($len:expr, $lmask:expr, $rmask:expr, $out_mask:expr, $lhs_arr:expr, $lhs_off:expr, $rhs_arr:expr, $rhs_off:expr) => {{
605 let mut data = Bitmask::new_set_all($len, false);
606 let _ = confirm_mask_capacity($len + $lhs_off, $lmask)?;
607 let _ = confirm_mask_capacity($len + $rhs_off, $rmask)?;
608 for i in 0..$len {
609 let valid = $lmask.map_or(true, |m| unsafe { m.get_unchecked($lhs_off + i) })
610 && $rmask.map_or(true, |m| unsafe { m.get_unchecked($rhs_off + i) });
611 let matched = if valid {
612 let s = unsafe { $lhs_arr.get_str_unchecked($lhs_off + i) };
613 let pat = unsafe { $rhs_arr.get_str_unchecked($rhs_off + i) };
614 if pat.is_empty() {
615 false
616 } else {
617 match Regex::new(pat) {
618 Ok(re) => re.is_match(s),
619 Err(_) => {
620 return Err(KernelError::InvalidArguments(
621 "Invalid regex string".to_string(),
622 ));
623 }
624 }
625 }
626 } else {
627 false
628 };
629 unsafe { data.set_unchecked(i, matched) };
630 unsafe { $out_mask.set_unchecked(i, valid) };
631 }
632 data
633 }};
634}
635
636#[cfg(feature = "regex")]
662pub fn regex_str_str<'a, T: Integer, U: Integer>(
663 lhs: StringAVT<'a, T>,
664 rhs: StringAVT<'a, U>,
665) -> Result<BooleanArray<()>, KernelError> {
666 let (larr, loff, llen) = lhs;
667 let (rarr, roff, rlen) = rhs;
668 let len = llen.min(rlen);
669 let (lmask, rmask, mut out_mask) =
670 string_predicate_masks(larr.null_mask.as_ref(), rarr.null_mask.as_ref(), len);
671
672 let data = regex_match_loop!(len, lmask, rmask, out_mask, larr, loff, rarr, roff);
673 Ok(BooleanArray {
674 data: data.into(),
675 null_mask: Some(out_mask),
676 len,
677 _phantom: PhantomData,
678 })
679}
680
681#[cfg(feature = "regex")]
703pub fn regex_dict_str<'a, U: Integer, T: Integer>(
704 lhs: CategoricalAVT<'a, U>,
705 rhs: StringAVT<'a, T>,
706) -> Result<BooleanArray<()>, KernelError> {
707 let (larr, loff, llen) = lhs;
708 let (rarr, roff, rlen) = rhs;
709 let len = llen.min(rlen);
710 let (lmask, rmask, mut out_mask) =
711 string_predicate_masks(larr.null_mask.as_ref(), rarr.null_mask.as_ref(), len);
712
713 let data = regex_match_loop!(len, lmask, rmask, out_mask, larr, loff, rarr, roff);
714 Ok(BooleanArray {
715 data: data.into(),
716 null_mask: Some(out_mask),
717 len,
718 _phantom: PhantomData,
719 })
720}
721
722#[cfg(feature = "regex")]
744pub fn regex_str_dict<'a, T: Integer, U: Integer>(
745 lhs: StringAVT<'a, T>,
746 rhs: CategoricalAVT<'a, U>,
747) -> Result<BooleanArray<()>, KernelError> {
748 let (larr, loff, llen) = lhs;
749 let (rarr, roff, rlen) = rhs;
750 let len = llen.min(rlen);
751 let (lmask, rmask, mut out_mask) =
752 string_predicate_masks(larr.null_mask.as_ref(), rarr.null_mask.as_ref(), len);
753
754 let data = regex_match_loop!(len, lmask, rmask, out_mask, larr, loff, rarr, roff);
755 Ok(BooleanArray {
756 data: data.into(),
757 null_mask: Some(out_mask),
758 len,
759 _phantom: PhantomData,
760 })
761}
762
763#[cfg(feature = "regex")]
788pub fn regex_dict_dict<'a, T: Integer>(
789 lhs: CategoricalAVT<'a, T>,
790 rhs: CategoricalAVT<'a, T>,
791) -> Result<BooleanArray<()>, KernelError> {
792 let (larr, loff, llen) = lhs;
793 let (rarr, roff, rlen) = rhs;
794 let len = llen.min(rlen);
795 let (lmask, rmask, mut out_mask) =
796 string_predicate_masks(larr.null_mask.as_ref(), rarr.null_mask.as_ref(), len);
797
798 let data = regex_match_loop!(len, lmask, rmask, out_mask, larr, loff, rarr, roff);
799 Ok(BooleanArray {
800 data: data.into(),
801 null_mask: Some(out_mask),
802 len,
803 _phantom: PhantomData,
804 })
805}
806
807pub fn len_str<'a, T: Integer + Copy>(
820 input: StringAVT<'a, T>,
821) -> Result<IntegerArray<T>, KernelError> {
822 let (array, offset, len) = input;
823 debug_assert!(offset + len <= array.offsets.len() - 1);
824
825 let mask_opt = array.null_mask.as_ref().map(|orig| {
826 let mut m = Bitmask::new_set_all(len, true);
827 for i in 0..len {
828 unsafe {
829 m.set_unchecked(i, orig.get_unchecked(offset + i));
830 }
831 }
832 m
833 });
834
835 let mut data = Vec64::<T>::with_capacity(len);
836 unsafe { data.set_len(len) };
837 for i in 0..len {
838 let valid = mask_opt
839 .as_ref()
840 .map_or(true, |m| unsafe { m.get_unchecked(i) });
841 if valid {
842 let start = array.offsets[offset + i].to_usize();
843 let end = array.offsets[offset + i + 1].to_usize();
844 let s = unsafe { std::str::from_utf8_unchecked(&array.data[start..end]) };
845 data[i] = T::from(s.chars().count()).unwrap();
846 } else {
847 data[i] = T::zero();
848 }
849 }
850
851 Ok(IntegerArray {
852 data: data.into(),
853 null_mask: mask_opt,
854 })
855}
856
857pub fn len_dict<'a, T: Integer>(
870 input: CategoricalAVT<'a, T>,
871) -> Result<IntegerArray<T>, KernelError> {
872 let (array, offset, len) = input;
873 debug_assert!(offset + len <= array.data.len());
874
875 let mask_opt = array.null_mask.as_ref().map(|orig| {
876 let mut m = Bitmask::new_set_all(len, true);
877 for i in 0..len {
878 unsafe {
879 m.set_unchecked(i, orig.get_unchecked(offset + i));
880 }
881 }
882 m
883 });
884
885 let mut data = Vec64::<T>::with_capacity(len);
886 unsafe { data.set_len(len) };
887 for i in 0..len {
888 let valid = mask_opt
889 .as_ref()
890 .map_or(true, |m| unsafe { m.get_unchecked(i) });
891 data[i] = if valid {
892 T::from(
893 unsafe { array.get_str_unchecked(offset + i) }
894 .chars()
895 .count(),
896 )
897 .unwrap()
898 } else {
899 T::zero()
900 };
901 }
902
903 Ok(IntegerArray {
904 data: data.into(),
905 null_mask: mask_opt,
906 })
907}
908
909#[inline]
920pub fn min_string_array<T: Integer>(window: StringAVT<T>) -> Option<String> {
921 let (arr, offset, len) = window;
922 let mut min_str: Option<&str> = None;
923 for i in offset..offset + len {
924 if arr
925 .null_mask
926 .as_ref()
927 .map_or(true, |b| unsafe { b.get_unchecked(i) })
928 {
929 let s = unsafe { arr.get_str_unchecked(i) };
930 if min_str.map_or(true, |min| s < min) {
931 min_str = Some(s);
932 }
933 }
934 }
935 min_str.map(str::to_owned)
936}
937
938#[inline]
949pub fn max_string_array<T: Integer>(window: StringAVT<T>) -> Option<String> {
950 let (arr, offset, len) = window;
951 let mut max_str: Option<&str> = None;
952 for i in offset..offset + len {
953 if arr
954 .null_mask
955 .as_ref()
956 .map_or(true, |b| unsafe { b.get_unchecked(i) })
957 {
958 let s = unsafe { arr.get_str_unchecked(i) };
959 if max_str.map_or(true, |max| s > max) {
960 max_str = Some(s);
961 }
962 }
963 }
964 max_str.map(str::to_owned)
965}
966
967
968#[inline]
979pub fn min_categorical_array<T: Integer>(window: CategoricalAVT<T>) -> Option<String> {
980 let (arr, offset, len) = window;
981 let mut min_code: Option<T> = None;
982 for i in offset..offset + len {
983 if arr
984 .null_mask
985 .as_ref()
986 .map_or(true, |b| unsafe { b.get_unchecked(i) })
987 {
988 let code = arr.data[i];
989 if min_code.map_or(true, |min| {
990 let sc = &arr.unique_values[code.to_usize()];
991 let sm = &arr.unique_values[min.to_usize()];
992 sc < sm
993 }) {
994 min_code = Some(code);
995 }
996 }
997 }
998 min_code.map(|code| arr.unique_values[code.to_usize()].clone())
999}
1000
1001
1002
1003#[inline]
1014pub fn max_categorical_array<T: Integer>(window: CategoricalAVT<T>) -> Option<String> {
1015 let (arr, offset, len) = window;
1016 let mut max_code: Option<T> = None;
1017 for i in offset..offset + len {
1018 if arr
1019 .null_mask
1020 .as_ref()
1021 .map_or(true, |b| unsafe { b.get_unchecked(i) })
1022 {
1023 let code = arr.data[i];
1024 if max_code.map_or(true, |max| {
1025 let sc = &arr.unique_values[code.to_usize()];
1026 let sm = &arr.unique_values[max.to_usize()];
1027 sc > sm
1028 }) {
1029 max_code = Some(code);
1030 }
1031 }
1032 }
1033 max_code.map(|code| arr.unique_values[code.to_usize()].clone())
1034}
1035
1036#[inline(always)]
1050pub fn count_distinct_string<T: Integer>(window: StringAVT<T>) -> usize {
1051 let (arr, offset, len) = window;
1052 #[cfg(feature = "fast_hash")]
1053 let mut set = AHashSet::with_capacity(len);
1054 #[cfg(not(feature = "fast_hash"))]
1055 let mut set = HashSet::with_capacity(len);
1056 let null_mask = arr.null_mask.as_ref();
1057
1058 for i in offset..offset + len {
1059 let valid = null_mask.map_or(true, |b| unsafe { b.get_unchecked(i) });
1060 if valid {
1061 let s = unsafe { arr.get_str_unchecked(i) };
1062 set.insert(s);
1063 if set.len() == len {
1064 break;
1065 }
1066 }
1067 }
1068 set.len()
1069}
1070
1071#[cfg(test)]
1072mod tests {
1073 use minarrow::{CategoricalArray, StringArray, vec64};
1074
1075 use super::*;
1076
1077 fn str_array<T: Integer>(vals: &[&str]) -> StringArray<T> {
1080 StringArray::<T>::from_slice(vals)
1081 }
1082
1083 fn dict_array<T: Integer>(vals: &[&str]) -> CategoricalArray<T> {
1084 let owned: Vec<&str> = vals.to_vec();
1085 CategoricalArray::<T>::from_values(owned)
1086 }
1087
1088 fn bm(bools: &[bool]) -> Bitmask {
1089 Bitmask::from_bools(bools)
1090 }
1091
1092 #[test]
1095 fn test_concat_str_str() {
1096 let a = str_array::<u32>(&["foo", "bar", ""]);
1097 let b = str_array::<u32>(&["baz", "qux", "quux"]);
1098 let out = concat_str_str((&a, 0, a.len()), (&b, 0, b.len()));
1099 assert_eq!(out.get(0), Some("foobaz"));
1100 assert_eq!(out.get(1), Some("barqux"));
1101 assert_eq!(out.get(2), Some("quux"));
1102 assert!(out.null_mask.as_ref().unwrap().all_set());
1103 }
1104
1105 #[test]
1106 fn test_concat_str_str_chunk() {
1107 let a = str_array::<u32>(&["XXX", "foo", "bar", ""]);
1108 let b = str_array::<u32>(&["YYY", "baz", "qux", "quux"]);
1109 let out = concat_str_str((&a, 1, 3), (&b, 1, 3));
1111 assert_eq!(out.get(0), Some("foobaz"));
1112 assert_eq!(out.get(1), Some("barqux"));
1113 assert_eq!(out.get(2), Some("quux"));
1114 assert!(out.null_mask.as_ref().unwrap().all_set());
1115 }
1116
1117 #[test]
1118 fn test_concat_dict_dict() {
1119 let a = dict_array::<u32>(&["x", "y"]);
1120 let b = dict_array::<u32>(&["1", "2"]);
1121 let out = concat_dict_dict((&a, 0, a.len()), (&b, 0, b.len())).unwrap();
1122 let s0 = out.get(0).unwrap();
1123 let s1 = out.get(1).unwrap();
1124 assert!(["x1", "y2"].contains(&s0));
1125 assert!(["x1", "y2"].contains(&s1));
1126 assert!(out.null_mask.as_ref().unwrap().all_set());
1127 }
1128
1129 #[test]
1130 fn test_concat_dict_dict_chunk() {
1131 let a = dict_array::<u32>(&["foo", "x", "y", "bar"]);
1132 let b = dict_array::<u32>(&["A", "1", "2", "B"]);
1133 let out = concat_dict_dict((&a, 1, 2), (&b, 1, 2)).unwrap();
1134 let s0 = out.get(0).unwrap();
1135 let s1 = out.get(1).unwrap();
1136 assert!(["x1", "y2"].contains(&s0));
1137 assert!(["x1", "y2"].contains(&s1));
1138 assert!(out.null_mask.as_ref().unwrap().all_set());
1139 }
1140
1141 #[test]
1142 fn test_concat_str_dict() {
1143 let a = str_array::<u32>(&["ab", "cd", ""]);
1144 let b = dict_array::<u32>(&["xy", "zq", ""]);
1145 let out = concat_str_dict((&a, 0, a.len()), (&b, 0, b.len())).unwrap();
1146 assert_eq!(out.get(0), Some("abxy"));
1147 assert_eq!(out.get(1), Some("cdzq"));
1148 assert_eq!(out.get(2), Some(""));
1149 assert!(out.null_mask.as_ref().unwrap().all_set());
1150 }
1151
1152 #[test]
1153 fn test_concat_str_dict_chunk() {
1154 let a = str_array::<u32>(&["dummy", "ab", "cd", ""]);
1155 let b = dict_array::<u32>(&["dummy", "xy", "zq", ""]);
1156 let out = concat_str_dict((&a, 1, 3), (&b, 1, 3)).unwrap();
1157 assert_eq!(out.get(0), Some("abxy"));
1158 assert_eq!(out.get(1), Some("cdzq"));
1159 assert_eq!(out.get(2), Some(""));
1160 assert!(out.null_mask.as_ref().unwrap().all_set());
1161 }
1162
1163 #[test]
1164 fn test_concat_dict_str() {
1165 let a = dict_array::<u32>(&["hi", "ho"]);
1166 let b = str_array::<u32>(&["yo", "no"]);
1167 let out = concat_dict_str((&a, 0, a.len()), (&b, 0, b.len())).unwrap();
1168 assert_eq!(out.get(0), Some("yohi"));
1169 assert_eq!(out.get(1), Some("noho"));
1170 assert!(out.null_mask.as_ref().unwrap().all_set());
1171 }
1172
1173 #[test]
1174 fn test_concat_dict_str_chunk() {
1175 let a = dict_array::<u32>(&["dummy", "hi", "ho", "zzz"]);
1176 let b = str_array::<u32>(&["dummy", "yo", "no", "xxx"]);
1177 let out = concat_dict_str((&a, 1, 2), (&b, 1, 2)).unwrap();
1178 assert_eq!(out.get(0), Some("yohi"));
1179 assert_eq!(out.get(1), Some("noho"));
1180 assert!(out.null_mask.as_ref().unwrap().all_set());
1181 }
1182
1183 #[test]
1186 fn test_contains_str_str() {
1187 let s = str_array::<u32>(&["abc", "def", "ghijk"]);
1188 let p = str_array::<u32>(&["b", "x", "jk"]);
1189 let out = contains_str_str((&s, 0, s.len()), (&p, 0, p.len()));
1190 assert_eq!(out.get(0), Some(true));
1191 assert_eq!(out.get(1), Some(false));
1192 assert_eq!(out.get(2), Some(true));
1193 }
1194
1195 #[test]
1196 fn test_contains_str_str_chunk() {
1197 let s = str_array::<u32>(&["dummy", "abc", "def", "ghijk"]);
1198 let p = str_array::<u32>(&["dummy", "b", "x", "jk"]);
1199 let out = contains_str_str((&s, 1, 3), (&p, 1, 3));
1200 assert_eq!(out.get(0), Some(true));
1201 assert_eq!(out.get(1), Some(false));
1202 assert_eq!(out.get(2), Some(true));
1203 }
1204
1205 #[test]
1206 fn test_starts_with_str_str() {
1207 let s = str_array::<u32>(&["apricot", "banana", "apple"]);
1208 let p = str_array::<u32>(&["ap", "ba", "a"]);
1209 let out = starts_with_str_str((&s, 0, s.len()), (&p, 0, p.len()));
1210 assert_eq!(out.get(0), Some(true));
1211 assert_eq!(out.get(1), Some(true));
1212 assert_eq!(out.get(2), Some(true));
1213 }
1214
1215 #[test]
1216 fn test_starts_with_str_str_chunk() {
1217 let s = str_array::<u32>(&["dummy", "apricot", "banana", "apple"]);
1218 let p = str_array::<u32>(&["dummy", "ap", "ba", "a"]);
1219 let out = starts_with_str_str((&s, 1, 3), (&p, 1, 3));
1220 assert_eq!(out.get(0), Some(true));
1221 assert_eq!(out.get(1), Some(true));
1222 assert_eq!(out.get(2), Some(true));
1223 }
1224
1225 #[test]
1226 fn test_ends_with_str_str() {
1227 let s = str_array::<u32>(&["robot", "fast", "last"]);
1228 let p = str_array::<u32>(&["ot", "st", "ast"]);
1229 let out = ends_with_str_str((&s, 0, s.len()), (&p, 0, p.len()));
1230 assert_eq!(out.get(0), Some(true));
1231 assert_eq!(out.get(1), Some(true));
1232 assert_eq!(out.get(2), Some(true));
1233 }
1234
1235 #[test]
1236 fn test_ends_with_str_str_chunk() {
1237 let s = str_array::<u32>(&["dummy", "robot", "fast", "last"]);
1238 let p = str_array::<u32>(&["dummy", "ot", "st", "ast"]);
1239 let out = ends_with_str_str((&s, 1, 3), (&p, 1, 3));
1240 assert_eq!(out.get(0), Some(true));
1241 assert_eq!(out.get(1), Some(true));
1242 assert_eq!(out.get(2), Some(true));
1243 }
1244
1245 #[test]
1246 fn test_contains_str_dict() {
1247 let s = str_array::<u32>(&["abcde", "xyz", "qrstuv"]);
1248 let p = dict_array::<u32>(&["c", "z", "tu"]);
1249 let out = contains_str_dict((&s, 0, s.len()), (&p, 0, p.len())).unwrap();
1250 assert_eq!(out.get(0), Some(true));
1251 assert_eq!(out.get(1), Some(true));
1252 assert_eq!(out.get(2), Some(true));
1253 }
1254
1255 #[test]
1256 fn test_contains_str_dict_chunk() {
1257 let s = str_array::<u32>(&["dummy", "abcde", "xyz", "qrstuv"]);
1258 let p = dict_array::<u32>(&["dummy", "c", "z", "tu"]);
1259 let out = contains_str_dict((&s, 1, 3), (&p, 1, 3)).unwrap();
1260 assert_eq!(out.get(0), Some(true));
1261 assert_eq!(out.get(1), Some(true));
1262 assert_eq!(out.get(2), Some(true));
1263 }
1264
1265 #[test]
1266 fn test_contains_dict_dict() {
1267 let s = dict_array::<u32>(&["cdef", "foo", "bar"]);
1268 let p = dict_array::<u32>(&["cd", "oo", "baz"]);
1269 let out = contains_dict_dict((&s, 0, s.len()), (&p, 0, p.len())).unwrap();
1270 assert_eq!(out.get(0), Some(true));
1271 assert_eq!(out.get(1), Some(true));
1272 assert_eq!(out.get(2), Some(false));
1273 }
1274
1275 #[test]
1276 fn test_contains_dict_dict_chunk() {
1277 let s = dict_array::<u32>(&["dummy", "cdef", "foo", "bar"]);
1278 let p = dict_array::<u32>(&["dummy", "cd", "oo", "baz"]);
1279 let out = contains_dict_dict((&s, 1, 3), (&p, 1, 3)).unwrap();
1280 assert_eq!(out.get(0), Some(true));
1281 assert_eq!(out.get(1), Some(true));
1282 assert_eq!(out.get(2), Some(false));
1283 }
1284
1285 #[test]
1286 fn test_contains_dict_str() {
1287 let s = dict_array::<u32>(&["hello", "world"]);
1288 let p = str_array::<u32>(&["he", "o"]);
1289 let out = contains_dict_str((&s, 0, s.len()), (&p, 0, p.len())).unwrap();
1290 assert_eq!(out.get(0), Some(true));
1291 assert_eq!(out.get(1), Some(true));
1292 }
1293
1294 #[test]
1295 fn test_contains_dict_str_chunk() {
1296 let s = dict_array::<u32>(&["dummy", "hello", "world"]);
1297 let p = str_array::<u32>(&["dummy", "he", "o"]);
1298 let out = contains_dict_str((&s, 1, 2), (&p, 1, 2)).unwrap();
1299 assert_eq!(out.get(0), Some(true));
1300 assert_eq!(out.get(1), Some(true));
1301 }
1302
1303 #[test]
1304 fn test_starts_with_str_dict() {
1305 let s = str_array::<u32>(&["abcdef", "foobar", "quux"]);
1306 let p = dict_array::<u32>(&["ab", "foo", "qu"]);
1307 let out = starts_with_str_dict((&s, 0, s.len()), (&p, 0, p.len())).unwrap();
1308 assert_eq!(out.get(0), Some(true));
1309 assert_eq!(out.get(1), Some(true));
1310 assert_eq!(out.get(2), Some(true));
1311 }
1312
1313 #[test]
1314 fn test_starts_with_str_dict_chunk() {
1315 let s = str_array::<u32>(&["dummy", "abcdef", "foobar", "quux"]);
1316 let p = dict_array::<u32>(&["dummy", "ab", "foo", "qu"]);
1317 let out = starts_with_str_dict((&s, 1, 3), (&p, 1, 3)).unwrap();
1318 assert_eq!(out.get(0), Some(true));
1319 assert_eq!(out.get(1), Some(true));
1320 assert_eq!(out.get(2), Some(true));
1321 }
1322
1323 #[test]
1324 fn test_starts_with_dict_dict() {
1325 let s = dict_array::<u32>(&["qwerty", "banana"]);
1326 let p = dict_array::<u32>(&["qw", "ban"]);
1327 let out = starts_with_dict_dict((&s, 0, s.len()), (&p, 0, p.len())).unwrap();
1328 assert_eq!(out.get(0), Some(true));
1329 assert_eq!(out.get(1), Some(true));
1330 }
1331
1332 #[test]
1333 fn test_starts_with_dict_dict_chunk() {
1334 let s = dict_array::<u32>(&["dummy", "qwerty", "banana"]);
1335 let p = dict_array::<u32>(&["dummy", "qw", "ban"]);
1336 let out = starts_with_dict_dict((&s, 1, 2), (&p, 1, 2)).unwrap();
1337 assert_eq!(out.get(0), Some(true));
1338 assert_eq!(out.get(1), Some(true));
1339 }
1340
1341 #[test]
1342 fn test_ends_with_str_dict() {
1343 let s = str_array::<u32>(&["poem", "dome", "gnome"]);
1344 let p = dict_array::<u32>(&["em", "me", "ome"]);
1345 let out = ends_with_str_dict((&s, 0, s.len()), (&p, 0, p.len())).unwrap();
1346 assert_eq!(out.get(0), Some(true));
1347 assert_eq!(out.get(1), Some(true));
1348 assert_eq!(out.get(2), Some(true));
1349 }
1350
1351 #[test]
1352 fn test_ends_with_str_dict_chunk() {
1353 let s = str_array::<u32>(&["dummy", "poem", "dome", "gnome"]);
1354 let p = dict_array::<u32>(&["dummy", "em", "me", "ome"]);
1355 let out = ends_with_str_dict((&s, 1, 3), (&p, 1, 3)).unwrap();
1356 assert_eq!(out.get(0), Some(true));
1357 assert_eq!(out.get(1), Some(true));
1358 assert_eq!(out.get(2), Some(true));
1359 }
1360
1361 #[test]
1362 fn test_ends_with_dict_dict() {
1363 let s = dict_array::<u32>(&["tablet", "let", "bet"]);
1364 let p = dict_array::<u32>(&["let", "et", "xyz"]);
1365 let out = ends_with_dict_dict((&s, 0, s.len()), (&p, 0, p.len())).unwrap();
1366 assert_eq!(out.get(0), Some(true));
1367 assert_eq!(out.get(1), Some(true));
1368 assert_eq!(out.get(2), Some(false));
1369 }
1370
1371 #[test]
1372 fn test_ends_with_dict_dict_chunk() {
1373 let s = dict_array::<u32>(&["dummy", "tablet", "let", "bet"]);
1374 let p = dict_array::<u32>(&["dummy", "let", "et", "xyz"]);
1375 let out = ends_with_dict_dict((&s, 1, 3), (&p, 1, 3)).unwrap();
1376 assert_eq!(out.get(0), Some(true));
1377 assert_eq!(out.get(1), Some(true));
1378 assert_eq!(out.get(2), Some(false));
1379 }
1380
1381 #[test]
1384 fn test_len_str() {
1385 let arr = str_array::<u32>(&["", "a", "abc", "bar"]);
1386 let out = len_str((&arr, 0, arr.len())).unwrap();
1387 assert_eq!(&out.data[..], &[0, 1, 3, 3]);
1388 }
1389
1390 #[test]
1391 fn test_len_str_chunk() {
1392 let arr = str_array::<u32>(&["zzz", "", "a", "abc", "bar"]);
1393 let out = len_str((&arr, 1, 4)).unwrap(); assert_eq!(&out.data[..], &[0, 1, 3, 3]);
1395 }
1396
1397 #[test]
1398 fn test_len_dict() {
1399 let arr = dict_array::<u32>(&["", "one", "seven"]);
1400 let out = len_dict((&arr, 0, arr.len())).unwrap();
1401 assert_eq!(&out.data[..], &[0, 3, 5]);
1402 }
1403
1404 #[test]
1405 fn test_len_dict_chunk() {
1406 let arr = dict_array::<u32>(&["q", "", "one", "seven"]);
1407 let out = len_dict((&arr, 1, 3)).unwrap(); assert_eq!(&out.data[..], &[0, 3, 5]);
1409 }
1410
1411 #[test]
1412 fn test_contains_empty_pattern() {
1413 let s = str_array::<u32>(&["foo", "bar"]);
1414 let p = str_array::<u32>(&["", ""]);
1415 let out = contains_str_str((&s, 0, s.len()), (&p, 0, p.len()));
1416 assert_eq!(out.get(0), Some(false));
1418 assert_eq!(out.get(1), Some(false));
1419 assert!(out.null_mask.as_ref().is_none());
1420 }
1421
1422 #[test]
1423 fn test_contains_empty_pattern_chunk() {
1424 let s = str_array::<u32>(&["z", "foo", "bar"]);
1425 let p = str_array::<u32>(&["z", "", ""]);
1426 let out = contains_str_str((&s, 1, 2), (&p, 1, 2));
1427 assert_eq!(out.get(0), Some(false));
1428 assert_eq!(out.get(1), Some(false));
1429 assert!(out.null_mask.as_ref().is_none());
1430 }
1431
1432 #[test]
1433 fn test_contains_str_str_nulls_on_pattern() {
1434 let mut s = str_array::<u32>(&["abc", "def"]);
1435 s.null_mask = Some(bm(&[true, true]));
1436 let mut p = str_array::<u32>(&["b", "e"]);
1437 p.null_mask = Some(bm(&[true, false])); let out = contains_str_str((&s, 0, s.len()), (&p, 0, p.len()));
1439 assert_eq!(out.get(0), Some(true));
1440 assert_eq!(out.get(1), Some(false));
1441 }
1442
1443 #[test]
1444 fn test_contains_str_str_nulls_on_pattern_chunk() {
1445 let mut s = str_array::<u32>(&["X", "abc", "def"]);
1446 s.null_mask = Some(bm(&[true, true, true]));
1447 let mut p = str_array::<u32>(&["X", "b", "e"]);
1448 p.null_mask = Some(bm(&[true, true, false])); let out = contains_str_str((&s, 1, 2), (&p, 1, 2));
1450 assert_eq!(out.get(0), Some(true));
1451 assert_eq!(out.get(1), Some(false));
1452 }
1453
1454 #[cfg(feature = "regex")]
1455 #[test]
1456 fn test_regex_invalid_pattern_returns_err() {
1457 let s = str_array::<u32>(&["abc"]);
1458 let p = str_array::<u32>(&["["]);
1459 let err = regex_str_str((&s, 0, s.len()), (&p, 0, p.len())).unwrap_err();
1460 match err {
1461 KernelError::InvalidArguments(_) => {}
1462 _ => panic!("expected InvalidArguments"),
1463 }
1464 }
1465
1466 #[cfg(feature = "regex")]
1467 #[test]
1468 fn test_regex_invalid_pattern_returns_err_chunk() {
1469 let s = str_array::<u32>(&["foo", "abc"]);
1470 let p = str_array::<u32>(&["bar", "["]);
1471 let err = regex_str_str((&s, 1, 1), (&p, 1, 1)).unwrap_err();
1472 match err {
1473 KernelError::InvalidArguments(_) => {}
1474 _ => panic!("expected InvalidArguments"),
1475 }
1476 }
1477
1478 #[cfg(feature = "regex")]
1479 #[test]
1480 fn test_regex_empty_pattern_always_false() {
1481 let s = str_array::<u32>(&["abc", "def"]);
1482 let p = str_array::<u32>(&["", ""]);
1483 let out = regex_str_str((&s, 0, s.len()), (&p, 0, p.len())).unwrap();
1484 assert_eq!(out.get(0), Some(false));
1485 assert_eq!(out.get(1), Some(false));
1486 assert!(out.null_mask.unwrap().all_set());
1487 }
1488
1489 #[cfg(feature = "regex")]
1490 #[test]
1491 fn test_regex_empty_pattern_always_false_chunk() {
1492 let s = str_array::<u32>(&["z", "abc", "def"]);
1493 let p = str_array::<u32>(&["z", "", ""]);
1494 let out = regex_str_str((&s, 1, 2), (&p, 1, 2)).unwrap();
1495 assert_eq!(out.get(0), Some(false));
1496 assert_eq!(out.get(1), Some(false));
1497 assert!(out.null_mask.unwrap().all_set());
1498 }
1499
1500 #[test]
1501 fn test_len_str_with_nulls() {
1502 let mut arr = str_array::<u32>(&["foo", "", "bar"]);
1503 arr.null_mask = Some(bm(&[true, false, true]));
1504 let len_arr = len_str((&arr, 0, arr.len())).unwrap();
1505 assert_eq!(len_arr.data.as_slice(), &[3, 0, 3]);
1506 assert_eq!(
1507 len_arr.null_mask.unwrap().as_slice(),
1508 bm(&[true, false, true]).as_slice()
1509 );
1510 }
1511
1512 #[test]
1513 fn test_len_str_with_nulls_chunk() {
1514 let mut arr = str_array::<u32>(&["x", "foo", "", "bar"]);
1515 arr.null_mask = Some(bm(&[true, true, false, true]));
1516 let len_arr = len_str((&arr, 1, 3)).unwrap();
1517 assert_eq!(len_arr.data.as_slice(), &[3, 0, 3]);
1518 assert_eq!(
1519 len_arr.null_mask.unwrap().as_slice(),
1520 bm(&[true, false, true]).as_slice()
1521 );
1522 }
1523
1524 #[test]
1525 fn test_len_dict_with_nulls() {
1526 let mut arr = dict_array::<u32>(&["x", "yy", "zzz"]);
1527 arr.null_mask = Some(bm(&[false, true, true]));
1528 let len_arr = len_dict((&arr, 0, arr.len())).unwrap();
1529 assert_eq!(len_arr.data.as_slice(), &[0, 2, 3]);
1530 assert_eq!(
1531 len_arr.null_mask.unwrap().as_slice(),
1532 bm(&[false, true, true]).as_slice()
1533 );
1534 }
1535
1536 #[test]
1537 fn test_len_dict_with_nulls_chunk() {
1538 let mut arr = dict_array::<u32>(&["z", "x", "yy", "zzz"]);
1539 arr.null_mask = Some(bm(&[true, false, true, true]));
1540 let len_arr = len_dict((&arr, 1, 3)).unwrap();
1541 assert_eq!(len_arr.data.as_slice(), &[0, 2, 3]);
1542 assert_eq!(
1543 len_arr.null_mask.unwrap().as_slice(),
1544 bm(&[false, true, true]).as_slice()
1545 );
1546 }
1547
1548 fn bitmask_from_vec(v: &[bool]) -> Bitmask {
1549 let mut bm = Bitmask::with_capacity(v.len());
1550 for (i, &b) in v.iter().enumerate() {
1551 bm.set(i, b);
1552 }
1553 bm
1554 }
1555
1556 #[test]
1557 fn test_min_string_array_all_valid() {
1558 let arr = StringArray::<u32>::from_slice(&["zulu", "alpha", "echo", "bravo"]);
1559 let view = (&arr, 0, arr.len());
1560 let result = min_string_array::<u32>(view);
1561 assert_eq!(result, Some("alpha".to_string()));
1562 }
1563
1564 #[test]
1565 fn test_min_string_array_with_nulls() {
1566 let mut arr = StringArray::<u32>::from_slice(&["zulu", "alpha", "echo", "bravo"]);
1567 arr.null_mask = Some(bitmask_from_vec(&[false, true, true, true]));
1568 let view = (&arr, 0, arr.len());
1569 let result = min_string_array::<u32>(view);
1570 assert_eq!(result, Some("alpha".to_string()));
1571 }
1572
1573 #[test]
1574 fn test_min_string_array_all_null() {
1575 let arr = StringArray::<u32>::from_slice(&["zulu", "alpha", "echo", "bravo"]);
1576 let mut null_mask = Bitmask::with_capacity(arr.len());
1577 for i in 0..arr.len() {
1578 null_mask.set(i, false);
1579 }
1580 let arr = StringArray::<u32> {
1581 null_mask: Some(null_mask),
1582 ..arr
1583 };
1584 let view = (&arr, 0, arr.len());
1585 let result = min_string_array::<u32>(view);
1586 assert_eq!(result, None);
1587 }
1588
1589 #[test]
1590 fn test_max_string_array_all_valid() {
1591 let arr = StringArray::<u32>::from_slice(&["zulu", "alpha", "echo", "bravo"]);
1592 let view = (&arr, 0, arr.len());
1593 let result = max_string_array::<u32>(view);
1594 assert_eq!(result, Some("zulu".to_string()));
1595 }
1596
1597 #[test]
1598 fn test_max_string_array_with_nulls() {
1599 let mut arr = StringArray::<u32>::from_slice(&["zulu", "alpha", "echo", "bravo"]);
1600 arr.null_mask = Some(bitmask_from_vec(&[true, false, true, false]));
1601 let view = (&arr, 0, arr.len());
1602 let result = max_string_array::<u32>(view);
1603 assert_eq!(result, Some("zulu".to_string()));
1604 }
1605
1606 #[test]
1607 fn test_max_string_array_all_null() {
1608 let arr = StringArray::<u32>::from_slice(&["zulu", "alpha", "echo", "bravo"]);
1609 let mut null_mask = Bitmask::with_capacity(arr.len());
1610 for i in 0..arr.len() {
1611 null_mask.set(i, false);
1612 }
1613 let arr = StringArray::<u32> {
1614 null_mask: Some(null_mask),
1615 ..arr
1616 };
1617 let view = (&arr, 0, arr.len());
1618 let result = max_string_array::<u32>(view);
1619 assert_eq!(result, None);
1620 }
1621
1622 #[test]
1623 fn test_min_categorical_array() {
1624 let uniques = vec64![
1625 "dog".to_string(),
1626 "zebra".to_string(),
1627 "ant".to_string(),
1628 "bee".to_string()
1629 ];
1630 let indices = vec64![1u32, 0, 3, 2]; let cat = CategoricalArray {
1632 data: indices.clone().into(),
1633 unique_values: uniques.clone().into(),
1634 null_mask: None,
1635 };
1636 let result = min_categorical_array((&cat, 0, indices.len()));
1637 assert_eq!(result, Some("ant".to_string()));
1638 }
1639
1640 #[test]
1641 fn test_max_categorical_array() {
1642 let uniques = vec64![
1643 "dog".to_string(),
1644 "zebra".to_string(),
1645 "ant".to_string(),
1646 "bee".to_string()
1647 ];
1648 let indices = vec64![2u32, 0, 1, 3]; let cat = CategoricalArray {
1650 data: indices.clone().into(),
1651 unique_values: uniques.clone().into(),
1652 null_mask: None,
1653 };
1654 let result = max_categorical_array((&cat, 0, indices.len()));
1655 assert_eq!(result, Some("zebra".to_string()));
1656 }
1657
1658 #[test]
1659 fn test_min_categorical_array_with_nulls() {
1660 let uniques = vec64!["dog".to_string(), "zebra".to_string(), "ant".to_string()];
1661 let indices = vec64![1u32, 2, 0];
1662 let mut null_mask = Bitmask::with_capacity(indices.len());
1663 null_mask.set(0, true);
1664 null_mask.set(1, false);
1665 null_mask.set(2, true);
1666 let cat = CategoricalArray {
1667 data: indices.clone().into(),
1668 unique_values: uniques.clone().into(),
1669 null_mask: Some(null_mask),
1670 };
1671 let result = min_categorical_array((&cat, 0, indices.len()));
1672 assert_eq!(result, Some("dog".to_string())); }
1674
1675 #[test]
1676 fn test_max_categorical_array_with_nulls() {
1677 let uniques = vec64!["dog".to_string(), "zebra".to_string(), "ant".to_string()];
1678 let indices = vec64![1u32, 2, 0];
1679 let mut null_mask = Bitmask::with_capacity(indices.len());
1680 null_mask.set(0, true);
1681 null_mask.set(1, false);
1682 null_mask.set(2, true);
1683 let cat = CategoricalArray {
1684 data: indices.clone().into(),
1685 unique_values: uniques.clone().into(),
1686 null_mask: Some(null_mask),
1687 };
1688 let result = max_categorical_array((&cat, 0, indices.len()));
1689 assert_eq!(result, Some("zebra".to_string())); }
1691}