1pub mod expr;
21pub mod memory;
22pub mod proxy;
23pub mod string_utils;
24
25use crate::error::{_exec_datafusion_err, _internal_err};
26use crate::{DataFusionError, Result, ScalarValue};
27use arrow::array::{
28 cast::AsArray, Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray,
29 OffsetSizeTrait,
30};
31use arrow::buffer::OffsetBuffer;
32use arrow::compute::{partition, SortColumn, SortOptions};
33use arrow::datatypes::{DataType, Field, SchemaRef};
34use sqlparser::ast::Ident;
35use sqlparser::dialect::GenericDialect;
36use sqlparser::parser::Parser;
37use std::borrow::{Borrow, Cow};
38use std::cmp::{min, Ordering};
39use std::collections::HashSet;
40use std::num::NonZero;
41use std::ops::Range;
42use std::sync::Arc;
43use std::thread::available_parallelism;
44
45pub fn project_schema(
75 schema: &SchemaRef,
76 projection: Option<&Vec<usize>>,
77) -> Result<SchemaRef> {
78 let schema = match projection {
79 Some(columns) => Arc::new(schema.project(columns)?),
80 None => Arc::clone(schema),
81 };
82 Ok(schema)
83}
84
85pub fn extract_row_at_idx_to_buf(
87 columns: &[ArrayRef],
88 idx: usize,
89 buf: &mut Vec<ScalarValue>,
90) -> Result<()> {
91 buf.clear();
92
93 let iter = columns
94 .iter()
95 .map(|arr| ScalarValue::try_from_array(arr, idx));
96 for v in iter.into_iter() {
97 buf.push(v?);
98 }
99
100 Ok(())
101}
102pub fn get_row_at_idx(columns: &[ArrayRef], idx: usize) -> Result<Vec<ScalarValue>> {
104 columns
105 .iter()
106 .map(|arr| ScalarValue::try_from_array(arr, idx))
107 .collect()
108}
109
110pub fn compare_rows(
112 x: &[ScalarValue],
113 y: &[ScalarValue],
114 sort_options: &[SortOptions],
115) -> Result<Ordering> {
116 let zip_it = x.iter().zip(y.iter()).zip(sort_options.iter());
117 for ((lhs, rhs), sort_options) in zip_it {
119 let result = match (lhs.is_null(), rhs.is_null(), sort_options.nulls_first) {
121 (true, false, false) | (false, true, true) => Ordering::Greater,
122 (true, false, true) | (false, true, false) => Ordering::Less,
123 (false, false, _) => {
124 if sort_options.descending {
125 rhs.try_cmp(lhs)?
126 } else {
127 lhs.try_cmp(rhs)?
128 }
129 }
130 (true, true, _) => continue,
131 };
132 if result != Ordering::Equal {
133 return Ok(result);
134 }
135 }
136 Ok(Ordering::Equal)
137}
138
139pub fn bisect<const SIDE: bool>(
144 item_columns: &[ArrayRef],
145 target: &[ScalarValue],
146 sort_options: &[SortOptions],
147) -> Result<usize> {
148 let low: usize = 0;
149 let high: usize = item_columns
150 .first()
151 .ok_or_else(|| {
152 DataFusionError::Internal("Column array shouldn't be empty".to_string())
153 })?
154 .len();
155 let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
156 let cmp = compare_rows(current, target, sort_options)?;
157 Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
158 };
159 find_bisect_point(item_columns, target, compare_fn, low, high)
160}
161
162pub fn find_bisect_point<F>(
169 item_columns: &[ArrayRef],
170 target: &[ScalarValue],
171 compare_fn: F,
172 mut low: usize,
173 mut high: usize,
174) -> Result<usize>
175where
176 F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
177{
178 while low < high {
179 let mid = ((high - low) / 2) + low;
180 let val = get_row_at_idx(item_columns, mid)?;
181 if compare_fn(&val, target)? {
182 low = mid + 1;
183 } else {
184 high = mid;
185 }
186 }
187 Ok(low)
188}
189
190pub fn linear_search<const SIDE: bool>(
195 item_columns: &[ArrayRef],
196 target: &[ScalarValue],
197 sort_options: &[SortOptions],
198) -> Result<usize> {
199 let low: usize = 0;
200 let high: usize = item_columns
201 .first()
202 .ok_or_else(|| {
203 DataFusionError::Internal("Column array shouldn't be empty".to_string())
204 })?
205 .len();
206 let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
207 let cmp = compare_rows(current, target, sort_options)?;
208 Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
209 };
210 search_in_slice(item_columns, target, compare_fn, low, high)
211}
212
213pub fn search_in_slice<F>(
218 item_columns: &[ArrayRef],
219 target: &[ScalarValue],
220 compare_fn: F,
221 mut low: usize,
222 high: usize,
223) -> Result<usize>
224where
225 F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
226{
227 while low < high {
228 let val = get_row_at_idx(item_columns, low)?;
229 if !compare_fn(&val, target)? {
230 break;
231 }
232 low += 1;
233 }
234 Ok(low)
235}
236
237pub fn evaluate_partition_ranges(
242 num_rows: usize,
243 partition_columns: &[SortColumn],
244) -> Result<Vec<Range<usize>>> {
245 Ok(if partition_columns.is_empty() {
246 vec![Range {
247 start: 0,
248 end: num_rows,
249 }]
250 } else {
251 let cols: Vec<_> = partition_columns
252 .iter()
253 .map(|x| Arc::clone(&x.values))
254 .collect();
255 partition(&cols)?.ranges()
256 })
257}
258
259pub fn quote_identifier(s: &str) -> Cow<str> {
264 if needs_quotes(s) {
265 Cow::Owned(format!("\"{}\"", s.replace('"', "\"\"")))
266 } else {
267 Cow::Borrowed(s)
268 }
269}
270
271fn needs_quotes(s: &str) -> bool {
273 let mut chars = s.chars();
274
275 if let Some(first_char) = chars.next() {
277 if !(first_char.is_ascii_lowercase() || first_char == '_') {
278 return true;
279 }
280 }
281
282 !chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
283}
284
285pub(crate) fn parse_identifiers(s: &str) -> Result<Vec<Ident>> {
286 let dialect = GenericDialect;
287 let mut parser = Parser::new(&dialect).try_with_sql(s)?;
288 let idents = parser.parse_multipart_identifier()?;
289 Ok(idents)
290}
291
292pub(crate) fn parse_identifiers_normalized(s: &str, ignore_case: bool) -> Vec<String> {
293 parse_identifiers(s)
294 .unwrap_or_default()
295 .into_iter()
296 .map(|id| match id.quote_style {
297 Some(_) => id.value,
298 None if ignore_case => id.value,
299 _ => id.value.to_ascii_lowercase(),
300 })
301 .collect::<Vec<_>>()
302}
303
304pub fn get_at_indices<T: Clone, I: Borrow<usize>>(
306 items: &[T],
307 indices: impl IntoIterator<Item = I>,
308) -> Result<Vec<T>> {
309 indices
310 .into_iter()
311 .map(|idx| items.get(*idx.borrow()).cloned())
312 .collect::<Option<Vec<T>>>()
313 .ok_or_else(|| {
314 DataFusionError::Execution(
315 "Expects indices to be in the range of searched vector".to_string(),
316 )
317 })
318}
319
320pub fn longest_consecutive_prefix<T: Borrow<usize>>(
326 sequence: impl IntoIterator<Item = T>,
327) -> usize {
328 let mut count = 0;
329 for item in sequence {
330 if !count.eq(item.borrow()) {
331 break;
332 }
333 count += 1;
334 }
335 count
336}
337
338#[derive(Debug, Clone)]
358pub struct SingleRowListArrayBuilder {
359 arr: ArrayRef,
361 nullable: bool,
363 field_name: Option<String>,
366}
367
368impl SingleRowListArrayBuilder {
369 pub fn new(arr: ArrayRef) -> Self {
371 Self {
372 arr,
373 nullable: true,
374 field_name: None,
375 }
376 }
377
378 pub fn with_nullable(mut self, nullable: bool) -> Self {
380 self.nullable = nullable;
381 self
382 }
383
384 pub fn with_field_name(mut self, field_name: Option<String>) -> Self {
386 self.field_name = field_name;
387 self
388 }
389
390 pub fn with_field(self, field: &Field) -> Self {
392 self.with_field_name(Some(field.name().to_owned()))
393 .with_nullable(field.is_nullable())
394 }
395
396 pub fn build_list_array(self) -> ListArray {
398 let (field, arr) = self.into_field_and_arr();
399 let offsets = OffsetBuffer::from_lengths([arr.len()]);
400 ListArray::new(field, offsets, arr, None)
401 }
402
403 pub fn build_list_scalar(self) -> ScalarValue {
405 ScalarValue::List(Arc::new(self.build_list_array()))
406 }
407
408 pub fn build_large_list_array(self) -> LargeListArray {
410 let (field, arr) = self.into_field_and_arr();
411 let offsets = OffsetBuffer::from_lengths([arr.len()]);
412 LargeListArray::new(field, offsets, arr, None)
413 }
414
415 pub fn build_large_list_scalar(self) -> ScalarValue {
417 ScalarValue::LargeList(Arc::new(self.build_large_list_array()))
418 }
419
420 pub fn build_fixed_size_list_array(self, list_size: usize) -> FixedSizeListArray {
422 let (field, arr) = self.into_field_and_arr();
423 FixedSizeListArray::new(field, list_size as i32, arr, None)
424 }
425
426 pub fn build_fixed_size_list_scalar(self, list_size: usize) -> ScalarValue {
428 ScalarValue::FixedSizeList(Arc::new(self.build_fixed_size_list_array(list_size)))
429 }
430
431 fn into_field_and_arr(self) -> (Arc<Field>, ArrayRef) {
433 let Self {
434 arr,
435 nullable,
436 field_name,
437 } = self;
438 let data_type = arr.data_type().to_owned();
439 let field = match field_name {
440 Some(name) => Field::new(name, data_type, nullable),
441 None => Field::new_list_field(data_type, nullable),
442 };
443 (Arc::new(field), arr)
444 }
445}
446
447#[deprecated(
451 since = "44.0.0",
452 note = "please use `SingleRowListArrayBuilder` instead"
453)]
454pub fn array_into_list_array_nullable(arr: ArrayRef) -> ListArray {
455 SingleRowListArrayBuilder::new(arr)
456 .with_nullable(true)
457 .build_list_array()
458}
459
460#[deprecated(
463 since = "44.0.0",
464 note = "please use `SingleRowListArrayBuilder` instead"
465)]
466pub fn array_into_list_array(arr: ArrayRef, nullable: bool) -> ListArray {
467 SingleRowListArrayBuilder::new(arr)
468 .with_nullable(nullable)
469 .build_list_array()
470}
471
472#[deprecated(
473 since = "44.0.0",
474 note = "please use `SingleRowListArrayBuilder` instead"
475)]
476pub fn array_into_list_array_with_field_name(
477 arr: ArrayRef,
478 nullable: bool,
479 field_name: &str,
480) -> ListArray {
481 SingleRowListArrayBuilder::new(arr)
482 .with_nullable(nullable)
483 .with_field_name(Some(field_name.to_string()))
484 .build_list_array()
485}
486
487#[deprecated(
490 since = "44.0.0",
491 note = "please use `SingleRowListArrayBuilder` instead"
492)]
493pub fn array_into_large_list_array(arr: ArrayRef) -> LargeListArray {
494 SingleRowListArrayBuilder::new(arr).build_large_list_array()
495}
496
497#[deprecated(
498 since = "44.0.0",
499 note = "please use `SingleRowListArrayBuilder` instead"
500)]
501pub fn array_into_large_list_array_with_field_name(
502 arr: ArrayRef,
503 field_name: &str,
504) -> LargeListArray {
505 SingleRowListArrayBuilder::new(arr)
506 .with_field_name(Some(field_name.to_string()))
507 .build_large_list_array()
508}
509
510#[deprecated(
511 since = "44.0.0",
512 note = "please use `SingleRowListArrayBuilder` instead"
513)]
514pub fn array_into_fixed_size_list_array(
515 arr: ArrayRef,
516 list_size: usize,
517) -> FixedSizeListArray {
518 SingleRowListArrayBuilder::new(arr).build_fixed_size_list_array(list_size)
519}
520
521#[deprecated(
522 since = "44.0.0",
523 note = "please use `SingleRowListArrayBuilder` instead"
524)]
525pub fn array_into_fixed_size_list_array_with_field_name(
526 arr: ArrayRef,
527 list_size: usize,
528 field_name: &str,
529) -> FixedSizeListArray {
530 SingleRowListArrayBuilder::new(arr)
531 .with_field_name(Some(field_name.to_string()))
532 .build_fixed_size_list_array(list_size)
533}
534
535pub fn arrays_into_list_array(
557 arr: impl IntoIterator<Item = ArrayRef>,
558) -> Result<ListArray> {
559 let arr = arr.into_iter().collect::<Vec<_>>();
560 if arr.is_empty() {
561 return _internal_err!("Cannot wrap empty array into list array");
562 }
563
564 let lens = arr.iter().map(|x| x.len()).collect::<Vec<_>>();
565 let data_type = arr[0].data_type().to_owned();
567 let values = arr.iter().map(|x| x.as_ref()).collect::<Vec<_>>();
568 Ok(ListArray::new(
569 Arc::new(Field::new_list_field(data_type, true)),
570 OffsetBuffer::from_lengths(lens),
571 arrow::compute::concat(values.as_slice())?,
572 None,
573 ))
574}
575
576pub fn list_to_arrays<O: OffsetSizeTrait>(a: &ArrayRef) -> Vec<ArrayRef> {
578 a.as_list::<O>().iter().flatten().collect::<Vec<_>>()
579}
580
581pub fn fixed_size_list_to_arrays(a: &ArrayRef) -> Vec<ArrayRef> {
583 a.as_fixed_size_list().iter().flatten().collect::<Vec<_>>()
584}
585
586pub fn base_type(data_type: &DataType) -> DataType {
601 match data_type {
602 DataType::List(field)
603 | DataType::LargeList(field)
604 | DataType::FixedSizeList(field, _) => base_type(field.data_type()),
605 _ => data_type.to_owned(),
606 }
607}
608
609#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
611pub enum ListCoercion {
612 FixedSizedListToList,
614}
615
616pub fn coerced_type_with_base_type_only(
629 data_type: &DataType,
630 base_type: &DataType,
631 array_coercion: Option<&ListCoercion>,
632) -> DataType {
633 match (data_type, array_coercion) {
634 (DataType::List(field), _)
635 | (DataType::FixedSizeList(field, _), Some(ListCoercion::FixedSizedListToList)) =>
636 {
637 let field_type = coerced_type_with_base_type_only(
638 field.data_type(),
639 base_type,
640 array_coercion,
641 );
642
643 DataType::List(Arc::new(Field::new(
644 field.name(),
645 field_type,
646 field.is_nullable(),
647 )))
648 }
649 (DataType::FixedSizeList(field, len), _) => {
650 let field_type = coerced_type_with_base_type_only(
651 field.data_type(),
652 base_type,
653 array_coercion,
654 );
655
656 DataType::FixedSizeList(
657 Arc::new(Field::new(field.name(), field_type, field.is_nullable())),
658 *len,
659 )
660 }
661 (DataType::LargeList(field), _) => {
662 let field_type = coerced_type_with_base_type_only(
663 field.data_type(),
664 base_type,
665 array_coercion,
666 );
667
668 DataType::LargeList(Arc::new(Field::new(
669 field.name(),
670 field_type,
671 field.is_nullable(),
672 )))
673 }
674
675 _ => base_type.clone(),
676 }
677}
678
679pub fn coerced_fixed_size_list_to_list(data_type: &DataType) -> DataType {
681 match data_type {
682 DataType::List(field) | DataType::FixedSizeList(field, _) => {
683 let field_type = coerced_fixed_size_list_to_list(field.data_type());
684
685 DataType::List(Arc::new(Field::new(
686 field.name(),
687 field_type,
688 field.is_nullable(),
689 )))
690 }
691 DataType::LargeList(field) => {
692 let field_type = coerced_fixed_size_list_to_list(field.data_type());
693
694 DataType::LargeList(Arc::new(Field::new(
695 field.name(),
696 field_type,
697 field.is_nullable(),
698 )))
699 }
700
701 _ => data_type.clone(),
702 }
703}
704
705pub fn list_ndims(data_type: &DataType) -> u64 {
707 match data_type {
708 DataType::List(field)
709 | DataType::LargeList(field)
710 | DataType::FixedSizeList(field, _) => 1 + list_ndims(field.data_type()),
711 _ => 0,
712 }
713}
714
715pub mod datafusion_strsim {
717 use std::cmp::min;
720 use std::str::Chars;
721
722 struct StringWrapper<'a>(&'a str);
723
724 impl<'b> IntoIterator for &StringWrapper<'b> {
725 type Item = char;
726 type IntoIter = Chars<'b>;
727
728 fn into_iter(self) -> Self::IntoIter {
729 self.0.chars()
730 }
731 }
732
733 fn generic_levenshtein<'a, 'b, Iter1, Iter2, Elem1, Elem2>(
736 a: &'a Iter1,
737 b: &'b Iter2,
738 ) -> usize
739 where
740 &'a Iter1: IntoIterator<Item = Elem1>,
741 &'b Iter2: IntoIterator<Item = Elem2>,
742 Elem1: PartialEq<Elem2>,
743 {
744 let b_len = b.into_iter().count();
745
746 if a.into_iter().next().is_none() {
747 return b_len;
748 }
749
750 let mut cache: Vec<usize> = (1..b_len + 1).collect();
751
752 let mut result = 0;
753
754 for (i, a_elem) in a.into_iter().enumerate() {
755 result = i + 1;
756 let mut distance_b = i;
757
758 for (j, b_elem) in b.into_iter().enumerate() {
759 let cost = if a_elem == b_elem { 0usize } else { 1usize };
760 let distance_a = distance_b + cost;
761 distance_b = cache[j];
762 result = min(result + 1, min(distance_a, distance_b + 1));
763 cache[j] = result;
764 }
765 }
766
767 result
768 }
769
770 pub fn levenshtein(a: &str, b: &str) -> usize {
779 generic_levenshtein(&StringWrapper(a), &StringWrapper(b))
780 }
781
782 pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
796 if a.is_empty() && b.is_empty() {
797 return 1.0;
798 }
799 1.0 - (levenshtein(a, b) as f64)
800 / (a.chars().count().max(b.chars().count()) as f64)
801 }
802}
803
804pub fn merge_and_order_indices<T: Borrow<usize>, S: Borrow<usize>>(
807 first: impl IntoIterator<Item = T>,
808 second: impl IntoIterator<Item = S>,
809) -> Vec<usize> {
810 let mut result: Vec<_> = first
811 .into_iter()
812 .map(|e| *e.borrow())
813 .chain(second.into_iter().map(|e| *e.borrow()))
814 .collect::<HashSet<_>>()
815 .into_iter()
816 .collect();
817 result.sort();
818 result
819}
820
821pub fn set_difference<T: Borrow<usize>, S: Borrow<usize>>(
824 first: impl IntoIterator<Item = T>,
825 second: impl IntoIterator<Item = S>,
826) -> Vec<usize> {
827 let set: HashSet<_> = second.into_iter().map(|e| *e.borrow()).collect();
828 first
829 .into_iter()
830 .map(|e| *e.borrow())
831 .filter(|e| !set.contains(e))
832 .collect()
833}
834
835#[deprecated(since = "45.0.0", note = "Use std::Iterator::is_sorted instead")]
837pub fn is_sorted<T: Borrow<usize>>(sequence: impl IntoIterator<Item = T>) -> bool {
838 let mut previous = 0;
840 for item in sequence.into_iter() {
841 let current = *item.borrow();
842 if current < previous {
843 return false;
844 }
845 previous = current;
846 }
847 true
848}
849
850pub fn find_indices<T: PartialEq, S: Borrow<T>>(
853 items: &[T],
854 targets: impl IntoIterator<Item = S>,
855) -> Result<Vec<usize>> {
856 targets
857 .into_iter()
858 .map(|target| items.iter().position(|e| target.borrow().eq(e)))
859 .collect::<Option<_>>()
860 .ok_or_else(|| DataFusionError::Execution("Target not found".to_string()))
861}
862
863pub fn transpose<T>(original: Vec<Vec<T>>) -> Vec<Vec<T>> {
865 match original.as_slice() {
866 [] => vec![],
867 [first, ..] => {
868 let mut result = (0..first.len()).map(|_| vec![]).collect::<Vec<_>>();
869 for row in original {
870 for (item, transposed_row) in row.into_iter().zip(&mut result) {
871 transposed_row.push(item);
872 }
873 }
874 result
875 }
876 }
877}
878
879pub fn combine_limit(
923 parent_skip: usize,
924 parent_fetch: Option<usize>,
925 child_skip: usize,
926 child_fetch: Option<usize>,
927) -> (usize, Option<usize>) {
928 let combined_skip = child_skip.saturating_add(parent_skip);
929
930 let combined_fetch = match (parent_fetch, child_fetch) {
931 (Some(parent_fetch), Some(child_fetch)) => {
932 Some(min(parent_fetch, child_fetch.saturating_sub(parent_skip)))
933 }
934 (Some(parent_fetch), None) => Some(parent_fetch),
935 (None, Some(child_fetch)) => Some(child_fetch.saturating_sub(parent_skip)),
936 (None, None) => None,
937 };
938
939 (combined_skip, combined_fetch)
940}
941
942pub fn get_available_parallelism() -> usize {
947 available_parallelism()
948 .unwrap_or(NonZero::new(1).expect("literal value `1` shouldn't be zero"))
949 .get()
950}
951
952pub fn take_function_args<const N: usize, T>(
976 function_name: &str,
977 args: impl IntoIterator<Item = T>,
978) -> Result<[T; N]> {
979 let args = args.into_iter().collect::<Vec<_>>();
980 args.try_into().map_err(|v: Vec<T>| {
981 _exec_datafusion_err!(
982 "{} function requires {} {}, got {}",
983 function_name,
984 N,
985 if N == 1 { "argument" } else { "arguments" },
986 v.len()
987 )
988 })
989}
990
991#[cfg(test)]
992mod tests {
993 use super::*;
994 use crate::ScalarValue::Null;
995 use arrow::array::Float64Array;
996 use sqlparser::tokenizer::Span;
997
998 #[test]
999 fn test_bisect_linear_left_and_right() -> Result<()> {
1000 let arrays: Vec<ArrayRef> = vec![
1001 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.])),
1002 Arc::new(Float64Array::from(vec![2.0, 3.0, 3.0, 4.0, 5.0])),
1003 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 10., 11.0])),
1004 Arc::new(Float64Array::from(vec![15.0, 13.0, 8.0, 5., 0.0])),
1005 ];
1006 let search_tuple: Vec<ScalarValue> = vec![
1007 ScalarValue::Float64(Some(8.0)),
1008 ScalarValue::Float64(Some(3.0)),
1009 ScalarValue::Float64(Some(8.0)),
1010 ScalarValue::Float64(Some(8.0)),
1011 ];
1012 let ords = [
1013 SortOptions {
1014 descending: false,
1015 nulls_first: true,
1016 },
1017 SortOptions {
1018 descending: false,
1019 nulls_first: true,
1020 },
1021 SortOptions {
1022 descending: false,
1023 nulls_first: true,
1024 },
1025 SortOptions {
1026 descending: true,
1027 nulls_first: true,
1028 },
1029 ];
1030 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1031 assert_eq!(res, 2);
1032 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1033 assert_eq!(res, 3);
1034 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1035 assert_eq!(res, 2);
1036 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1037 assert_eq!(res, 3);
1038 Ok(())
1039 }
1040
1041 #[test]
1042 fn vector_ord() {
1043 assert!(vec![1, 0, 0, 0, 0, 0, 0, 1] < vec![1, 0, 0, 0, 0, 0, 0, 2]);
1044 assert!(vec![1, 0, 0, 0, 0, 0, 1, 1] > vec![1, 0, 0, 0, 0, 0, 0, 2]);
1045 assert!(
1046 vec![
1047 ScalarValue::Int32(Some(2)),
1048 Null,
1049 ScalarValue::Int32(Some(0)),
1050 ] < vec![
1051 ScalarValue::Int32(Some(2)),
1052 Null,
1053 ScalarValue::Int32(Some(1)),
1054 ]
1055 );
1056 assert!(
1057 vec![
1058 ScalarValue::Int32(Some(2)),
1059 ScalarValue::Int32(None),
1060 ScalarValue::Int32(Some(0)),
1061 ] < vec![
1062 ScalarValue::Int32(Some(2)),
1063 ScalarValue::Int32(None),
1064 ScalarValue::Int32(Some(1)),
1065 ]
1066 );
1067 }
1068
1069 #[test]
1070 fn ord_same_type() {
1071 assert!((ScalarValue::Int32(Some(2)) < ScalarValue::Int32(Some(3))));
1072 }
1073
1074 #[test]
1075 fn test_bisect_linear_left_and_right_diff_sort() -> Result<()> {
1076 let arrays: Vec<ArrayRef> =
1078 vec![Arc::new(Float64Array::from(vec![4.0, 3.0, 2.0, 1.0, 0.0]))];
1079 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(4.0))];
1080 let ords = [SortOptions {
1081 descending: true,
1082 nulls_first: true,
1083 }];
1084 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1085 assert_eq!(res, 0);
1086 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1087 assert_eq!(res, 0);
1088
1089 let arrays: Vec<ArrayRef> =
1091 vec![Arc::new(Float64Array::from(vec![4.0, 3.0, 2.0, 1.0, 0.0]))];
1092 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(4.0))];
1093 let ords = [SortOptions {
1094 descending: true,
1095 nulls_first: true,
1096 }];
1097 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1098 assert_eq!(res, 1);
1099 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1100 assert_eq!(res, 1);
1101
1102 let arrays: Vec<ArrayRef> =
1104 vec![Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.]))];
1105 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(7.0))];
1106 let ords = [SortOptions {
1107 descending: false,
1108 nulls_first: true,
1109 }];
1110 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1111 assert_eq!(res, 1);
1112 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1113 assert_eq!(res, 1);
1114
1115 let arrays: Vec<ArrayRef> =
1117 vec![Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.]))];
1118 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(7.0))];
1119 let ords = [SortOptions {
1120 descending: false,
1121 nulls_first: true,
1122 }];
1123 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1124 assert_eq!(res, 2);
1125 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1126 assert_eq!(res, 2);
1127
1128 let arrays: Vec<ArrayRef> = vec![
1129 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 8.0, 9., 10.])),
1130 Arc::new(Float64Array::from(vec![10.0, 9.0, 8.0, 7.5, 7., 6.])),
1131 ];
1132 let search_tuple: Vec<ScalarValue> = vec![
1133 ScalarValue::Float64(Some(8.0)),
1134 ScalarValue::Float64(Some(8.0)),
1135 ];
1136 let ords = [
1137 SortOptions {
1138 descending: false,
1139 nulls_first: true,
1140 },
1141 SortOptions {
1142 descending: true,
1143 nulls_first: true,
1144 },
1145 ];
1146 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1147 assert_eq!(res, 3);
1148 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1149 assert_eq!(res, 3);
1150
1151 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1152 assert_eq!(res, 2);
1153 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1154 assert_eq!(res, 2);
1155 Ok(())
1156 }
1157
1158 #[test]
1159 fn test_evaluate_partition_ranges() -> Result<()> {
1160 let arrays: Vec<ArrayRef> = vec![
1161 Arc::new(Float64Array::from(vec![1.0, 1.0, 1.0, 2.0, 2.0, 2.0])),
1162 Arc::new(Float64Array::from(vec![4.0, 4.0, 3.0, 2.0, 1.0, 1.0])),
1163 ];
1164 let n_row = arrays[0].len();
1165 let options: Vec<SortOptions> = vec![
1166 SortOptions {
1167 descending: false,
1168 nulls_first: false,
1169 },
1170 SortOptions {
1171 descending: true,
1172 nulls_first: false,
1173 },
1174 ];
1175 let sort_columns = arrays
1176 .into_iter()
1177 .zip(options)
1178 .map(|(values, options)| SortColumn {
1179 values,
1180 options: Some(options),
1181 })
1182 .collect::<Vec<_>>();
1183 let ranges = evaluate_partition_ranges(n_row, &sort_columns)?;
1184 assert_eq!(ranges.len(), 4);
1185 assert_eq!(ranges[0], Range { start: 0, end: 2 });
1186 assert_eq!(ranges[1], Range { start: 2, end: 3 });
1187 assert_eq!(ranges[2], Range { start: 3, end: 4 });
1188 assert_eq!(ranges[3], Range { start: 4, end: 6 });
1189 Ok(())
1190 }
1191
1192 #[test]
1193 fn test_quote_identifier() -> Result<()> {
1194 let cases = vec![
1195 ("foo", r#"foo"#),
1196 ("_foo", r#"_foo"#),
1197 ("foo_bar", r#"foo_bar"#),
1198 ("foo-bar", r#""foo-bar""#),
1199 ("foo.bar", r#""foo.bar""#),
1201 ("Foo", r#""Foo""#),
1202 ("Foo.Bar", r#""Foo.Bar""#),
1203 ("test1", r#"test1"#),
1205 ("1test", r#""1test""#),
1206 ];
1207
1208 for (identifier, quoted_identifier) in cases {
1209 println!("input: \n{identifier}\nquoted_identifier:\n{quoted_identifier}");
1210
1211 assert_eq!(quote_identifier(identifier), quoted_identifier);
1212
1213 let quote_style = if quoted_identifier.starts_with('"') {
1216 Some('"')
1217 } else {
1218 None
1219 };
1220
1221 let expected_parsed = vec![Ident {
1222 value: identifier.to_string(),
1223 quote_style,
1224 span: Span::empty(),
1225 }];
1226
1227 assert_eq!(
1228 parse_identifiers(quoted_identifier).unwrap(),
1229 expected_parsed
1230 );
1231 }
1232
1233 Ok(())
1234 }
1235
1236 #[test]
1237 fn test_get_at_indices() -> Result<()> {
1238 let in_vec = vec![1, 2, 3, 4, 5, 6, 7];
1239 assert_eq!(get_at_indices(&in_vec, [0, 2])?, vec![1, 3]);
1240 assert_eq!(get_at_indices(&in_vec, [4, 2])?, vec![5, 3]);
1241 assert!(get_at_indices(&in_vec, [7]).is_err());
1243 Ok(())
1244 }
1245
1246 #[test]
1247 fn test_longest_consecutive_prefix() {
1248 assert_eq!(longest_consecutive_prefix([0, 3, 4]), 1);
1249 assert_eq!(longest_consecutive_prefix([0, 1, 3, 4]), 2);
1250 assert_eq!(longest_consecutive_prefix([0, 1, 2, 3, 4]), 5);
1251 assert_eq!(longest_consecutive_prefix([1, 2, 3, 4]), 0);
1252 }
1253
1254 #[test]
1255 fn test_merge_and_order_indices() {
1256 assert_eq!(
1257 merge_and_order_indices([0, 3, 4], [1, 3, 5]),
1258 vec![0, 1, 3, 4, 5]
1259 );
1260 assert_eq!(
1262 merge_and_order_indices([3, 0, 4], [5, 1, 3]),
1263 vec![0, 1, 3, 4, 5]
1264 );
1265 }
1266
1267 #[test]
1268 fn test_set_difference() {
1269 assert_eq!(set_difference([0, 3, 4], [1, 2]), vec![0, 3, 4]);
1270 assert_eq!(set_difference([0, 3, 4], [1, 2, 4]), vec![0, 3]);
1271 assert_eq!(set_difference([3, 4, 0], [1, 2, 4]), vec![3, 0]);
1273 assert_eq!(set_difference([0, 3, 4], [4, 1, 2]), vec![0, 3]);
1274 assert_eq!(set_difference([3, 4, 0], [4, 1, 2]), vec![3, 0]);
1275 }
1276
1277 #[test]
1278 #[expect(deprecated)]
1279 fn test_is_sorted() {
1280 assert!(is_sorted::<usize>([]));
1281 assert!(is_sorted([0]));
1282 assert!(is_sorted([0, 3, 4]));
1283 assert!(is_sorted([0, 1, 2]));
1284 assert!(is_sorted([0, 1, 4]));
1285 assert!(is_sorted([0usize; 0]));
1286 assert!(is_sorted([1, 2]));
1287 assert!(!is_sorted([3, 2]));
1288 }
1289
1290 #[test]
1291 fn test_find_indices() -> Result<()> {
1292 assert_eq!(find_indices(&[0, 3, 4], [0, 3, 4])?, vec![0, 1, 2]);
1293 assert_eq!(find_indices(&[0, 3, 4], [0, 4, 3])?, vec![0, 2, 1]);
1294 assert_eq!(find_indices(&[3, 0, 4], [0, 3])?, vec![1, 0]);
1295 assert!(find_indices(&[0, 3], [0, 3, 4]).is_err());
1296 assert!(find_indices(&[0, 3, 4], [0, 2]).is_err());
1297 Ok(())
1298 }
1299
1300 #[test]
1301 fn test_transpose() -> Result<()> {
1302 let in_data = vec![vec![1, 2, 3], vec![4, 5, 6]];
1303 let transposed = transpose(in_data);
1304 let expected = vec![vec![1, 4], vec![2, 5], vec![3, 6]];
1305 assert_eq!(expected, transposed);
1306 Ok(())
1307 }
1308}