1pub mod expr;
21pub mod memory;
22pub mod proxy;
23pub mod string_utils;
24
25use crate::error::{_exec_datafusion_err, _internal_err};
26use crate::{DataFusionError, Result, ScalarValue};
27use arrow::array::{
28 cast::AsArray, Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray,
29 OffsetSizeTrait,
30};
31use arrow::buffer::OffsetBuffer;
32use arrow::compute::{partition, SortColumn, SortOptions};
33use arrow::datatypes::{DataType, Field, SchemaRef};
34use sqlparser::ast::Ident;
35use sqlparser::dialect::GenericDialect;
36use sqlparser::parser::Parser;
37use std::borrow::{Borrow, Cow};
38use std::cmp::{min, Ordering};
39use std::collections::HashSet;
40use std::num::NonZero;
41use std::ops::Range;
42use std::sync::Arc;
43use std::thread::available_parallelism;
44
45pub fn project_schema(
75 schema: &SchemaRef,
76 projection: Option<&Vec<usize>>,
77) -> Result<SchemaRef> {
78 let schema = match projection {
79 Some(columns) => Arc::new(schema.project(columns)?),
80 None => Arc::clone(schema),
81 };
82 Ok(schema)
83}
84
85pub fn extract_row_at_idx_to_buf(
87 columns: &[ArrayRef],
88 idx: usize,
89 buf: &mut Vec<ScalarValue>,
90) -> Result<()> {
91 buf.clear();
92
93 let iter = columns
94 .iter()
95 .map(|arr| ScalarValue::try_from_array(arr, idx));
96 for v in iter.into_iter() {
97 buf.push(v?);
98 }
99
100 Ok(())
101}
102pub fn get_row_at_idx(columns: &[ArrayRef], idx: usize) -> Result<Vec<ScalarValue>> {
104 columns
105 .iter()
106 .map(|arr| ScalarValue::try_from_array(arr, idx))
107 .collect()
108}
109
110pub fn compare_rows(
112 x: &[ScalarValue],
113 y: &[ScalarValue],
114 sort_options: &[SortOptions],
115) -> Result<Ordering> {
116 let zip_it = x.iter().zip(y.iter()).zip(sort_options.iter());
117 for ((lhs, rhs), sort_options) in zip_it {
119 let result = match (lhs.is_null(), rhs.is_null(), sort_options.nulls_first) {
121 (true, false, false) | (false, true, true) => Ordering::Greater,
122 (true, false, true) | (false, true, false) => Ordering::Less,
123 (false, false, _) => {
124 if sort_options.descending {
125 rhs.try_cmp(lhs)?
126 } else {
127 lhs.try_cmp(rhs)?
128 }
129 }
130 (true, true, _) => continue,
131 };
132 if result != Ordering::Equal {
133 return Ok(result);
134 }
135 }
136 Ok(Ordering::Equal)
137}
138
139pub fn bisect<const SIDE: bool>(
144 item_columns: &[ArrayRef],
145 target: &[ScalarValue],
146 sort_options: &[SortOptions],
147) -> Result<usize> {
148 let low: usize = 0;
149 let high: usize = item_columns
150 .first()
151 .ok_or_else(|| {
152 DataFusionError::Internal("Column array shouldn't be empty".to_string())
153 })?
154 .len();
155 let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
156 let cmp = compare_rows(current, target, sort_options)?;
157 Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
158 };
159 find_bisect_point(item_columns, target, compare_fn, low, high)
160}
161
162pub fn find_bisect_point<F>(
169 item_columns: &[ArrayRef],
170 target: &[ScalarValue],
171 compare_fn: F,
172 mut low: usize,
173 mut high: usize,
174) -> Result<usize>
175where
176 F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
177{
178 while low < high {
179 let mid = ((high - low) / 2) + low;
180 let val = get_row_at_idx(item_columns, mid)?;
181 if compare_fn(&val, target)? {
182 low = mid + 1;
183 } else {
184 high = mid;
185 }
186 }
187 Ok(low)
188}
189
190pub fn linear_search<const SIDE: bool>(
195 item_columns: &[ArrayRef],
196 target: &[ScalarValue],
197 sort_options: &[SortOptions],
198) -> Result<usize> {
199 let low: usize = 0;
200 let high: usize = item_columns
201 .first()
202 .ok_or_else(|| {
203 DataFusionError::Internal("Column array shouldn't be empty".to_string())
204 })?
205 .len();
206 let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
207 let cmp = compare_rows(current, target, sort_options)?;
208 Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
209 };
210 search_in_slice(item_columns, target, compare_fn, low, high)
211}
212
213pub fn search_in_slice<F>(
218 item_columns: &[ArrayRef],
219 target: &[ScalarValue],
220 compare_fn: F,
221 mut low: usize,
222 high: usize,
223) -> Result<usize>
224where
225 F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
226{
227 while low < high {
228 let val = get_row_at_idx(item_columns, low)?;
229 if !compare_fn(&val, target)? {
230 break;
231 }
232 low += 1;
233 }
234 Ok(low)
235}
236
237pub fn evaluate_partition_ranges(
242 num_rows: usize,
243 partition_columns: &[SortColumn],
244) -> Result<Vec<Range<usize>>> {
245 Ok(if partition_columns.is_empty() {
246 vec![Range {
247 start: 0,
248 end: num_rows,
249 }]
250 } else {
251 let cols: Vec<_> = partition_columns
252 .iter()
253 .map(|x| Arc::clone(&x.values))
254 .collect();
255 partition(&cols)?.ranges()
256 })
257}
258
259pub fn quote_identifier(s: &str) -> Cow<'_, str> {
264 if needs_quotes(s) {
265 Cow::Owned(format!("\"{}\"", s.replace('"', "\"\"")))
266 } else {
267 Cow::Borrowed(s)
268 }
269}
270
271fn needs_quotes(s: &str) -> bool {
273 let mut chars = s.chars();
274
275 if let Some(first_char) = chars.next() {
277 if !(first_char.is_ascii_lowercase() || first_char == '_') {
278 return true;
279 }
280 }
281
282 !chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
283}
284
285pub(crate) fn parse_identifiers(s: &str) -> Result<Vec<Ident>> {
286 let dialect = GenericDialect;
287 let mut parser = Parser::new(&dialect).try_with_sql(s)?;
288 let idents = parser.parse_multipart_identifier()?;
289 Ok(idents)
290}
291
292pub(crate) fn parse_identifiers_normalized(s: &str, ignore_case: bool) -> Vec<String> {
293 parse_identifiers(s)
294 .unwrap_or_default()
295 .into_iter()
296 .map(|id| match id.quote_style {
297 Some(_) => id.value,
298 None if ignore_case => id.value,
299 _ => id.value.to_ascii_lowercase(),
300 })
301 .collect::<Vec<_>>()
302}
303
304pub fn get_at_indices<T: Clone, I: Borrow<usize>>(
306 items: &[T],
307 indices: impl IntoIterator<Item = I>,
308) -> Result<Vec<T>> {
309 indices
310 .into_iter()
311 .map(|idx| items.get(*idx.borrow()).cloned())
312 .collect::<Option<Vec<T>>>()
313 .ok_or_else(|| {
314 DataFusionError::Execution(
315 "Expects indices to be in the range of searched vector".to_string(),
316 )
317 })
318}
319
320pub fn longest_consecutive_prefix<T: Borrow<usize>>(
326 sequence: impl IntoIterator<Item = T>,
327) -> usize {
328 let mut count = 0;
329 for item in sequence {
330 if !count.eq(item.borrow()) {
331 break;
332 }
333 count += 1;
334 }
335 count
336}
337
338#[derive(Debug, Clone)]
358pub struct SingleRowListArrayBuilder {
359 arr: ArrayRef,
361 nullable: bool,
363 field_name: Option<String>,
366}
367
368impl SingleRowListArrayBuilder {
369 pub fn new(arr: ArrayRef) -> Self {
371 Self {
372 arr,
373 nullable: true,
374 field_name: None,
375 }
376 }
377
378 pub fn with_nullable(mut self, nullable: bool) -> Self {
380 self.nullable = nullable;
381 self
382 }
383
384 pub fn with_field_name(mut self, field_name: Option<String>) -> Self {
386 self.field_name = field_name;
387 self
388 }
389
390 pub fn with_field(self, field: &Field) -> Self {
392 self.with_field_name(Some(field.name().to_owned()))
393 .with_nullable(field.is_nullable())
394 }
395
396 pub fn build_list_array(self) -> ListArray {
398 let (field, arr) = self.into_field_and_arr();
399 let offsets = OffsetBuffer::from_lengths([arr.len()]);
400 ListArray::new(field, offsets, arr, None)
401 }
402
403 pub fn build_list_scalar(self) -> ScalarValue {
405 ScalarValue::List(Arc::new(self.build_list_array()))
406 }
407
408 pub fn build_large_list_array(self) -> LargeListArray {
410 let (field, arr) = self.into_field_and_arr();
411 let offsets = OffsetBuffer::from_lengths([arr.len()]);
412 LargeListArray::new(field, offsets, arr, None)
413 }
414
415 pub fn build_large_list_scalar(self) -> ScalarValue {
417 ScalarValue::LargeList(Arc::new(self.build_large_list_array()))
418 }
419
420 pub fn build_fixed_size_list_array(self, list_size: usize) -> FixedSizeListArray {
422 let (field, arr) = self.into_field_and_arr();
423 FixedSizeListArray::new(field, list_size as i32, arr, None)
424 }
425
426 pub fn build_fixed_size_list_scalar(self, list_size: usize) -> ScalarValue {
428 ScalarValue::FixedSizeList(Arc::new(self.build_fixed_size_list_array(list_size)))
429 }
430
431 fn into_field_and_arr(self) -> (Arc<Field>, ArrayRef) {
433 let Self {
434 arr,
435 nullable,
436 field_name,
437 } = self;
438 let data_type = arr.data_type().to_owned();
439 let field = match field_name {
440 Some(name) => Field::new(name, data_type, nullable),
441 None => Field::new_list_field(data_type, nullable),
442 };
443 (Arc::new(field), arr)
444 }
445}
446
447pub fn arrays_into_list_array(
469 arr: impl IntoIterator<Item = ArrayRef>,
470) -> Result<ListArray> {
471 let arr = arr.into_iter().collect::<Vec<_>>();
472 if arr.is_empty() {
473 return _internal_err!("Cannot wrap empty array into list array");
474 }
475
476 let lens = arr.iter().map(|x| x.len()).collect::<Vec<_>>();
477 let data_type = arr[0].data_type().to_owned();
479 let values = arr.iter().map(|x| x.as_ref()).collect::<Vec<_>>();
480 Ok(ListArray::new(
481 Arc::new(Field::new_list_field(data_type, true)),
482 OffsetBuffer::from_lengths(lens),
483 arrow::compute::concat(values.as_slice())?,
484 None,
485 ))
486}
487
488pub fn list_to_arrays<O: OffsetSizeTrait>(a: &ArrayRef) -> Vec<ArrayRef> {
490 a.as_list::<O>().iter().flatten().collect::<Vec<_>>()
491}
492
493pub fn fixed_size_list_to_arrays(a: &ArrayRef) -> Vec<ArrayRef> {
495 a.as_fixed_size_list().iter().flatten().collect::<Vec<_>>()
496}
497
498pub fn base_type(data_type: &DataType) -> DataType {
513 match data_type {
514 DataType::List(field)
515 | DataType::LargeList(field)
516 | DataType::FixedSizeList(field, _) => base_type(field.data_type()),
517 _ => data_type.to_owned(),
518 }
519}
520
521#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
523pub enum ListCoercion {
524 FixedSizedListToList,
526}
527
528pub fn coerced_type_with_base_type_only(
541 data_type: &DataType,
542 base_type: &DataType,
543 array_coercion: Option<&ListCoercion>,
544) -> DataType {
545 match (data_type, array_coercion) {
546 (DataType::List(field), _)
547 | (DataType::FixedSizeList(field, _), Some(ListCoercion::FixedSizedListToList)) =>
548 {
549 let field_type = coerced_type_with_base_type_only(
550 field.data_type(),
551 base_type,
552 array_coercion,
553 );
554
555 DataType::List(Arc::new(Field::new(
556 field.name(),
557 field_type,
558 field.is_nullable(),
559 )))
560 }
561 (DataType::FixedSizeList(field, len), _) => {
562 let field_type = coerced_type_with_base_type_only(
563 field.data_type(),
564 base_type,
565 array_coercion,
566 );
567
568 DataType::FixedSizeList(
569 Arc::new(Field::new(field.name(), field_type, field.is_nullable())),
570 *len,
571 )
572 }
573 (DataType::LargeList(field), _) => {
574 let field_type = coerced_type_with_base_type_only(
575 field.data_type(),
576 base_type,
577 array_coercion,
578 );
579
580 DataType::LargeList(Arc::new(Field::new(
581 field.name(),
582 field_type,
583 field.is_nullable(),
584 )))
585 }
586
587 _ => base_type.clone(),
588 }
589}
590
591pub fn coerced_fixed_size_list_to_list(data_type: &DataType) -> DataType {
593 match data_type {
594 DataType::List(field) | DataType::FixedSizeList(field, _) => {
595 let field_type = coerced_fixed_size_list_to_list(field.data_type());
596
597 DataType::List(Arc::new(Field::new(
598 field.name(),
599 field_type,
600 field.is_nullable(),
601 )))
602 }
603 DataType::LargeList(field) => {
604 let field_type = coerced_fixed_size_list_to_list(field.data_type());
605
606 DataType::LargeList(Arc::new(Field::new(
607 field.name(),
608 field_type,
609 field.is_nullable(),
610 )))
611 }
612
613 _ => data_type.clone(),
614 }
615}
616
617pub fn list_ndims(data_type: &DataType) -> u64 {
619 match data_type {
620 DataType::List(field)
621 | DataType::LargeList(field)
622 | DataType::FixedSizeList(field, _) => 1 + list_ndims(field.data_type()),
623 _ => 0,
624 }
625}
626
627pub mod datafusion_strsim {
629 use std::cmp::min;
632 use std::str::Chars;
633
634 struct StringWrapper<'a>(&'a str);
635
636 impl<'b> IntoIterator for &StringWrapper<'b> {
637 type Item = char;
638 type IntoIter = Chars<'b>;
639
640 fn into_iter(self) -> Self::IntoIter {
641 self.0.chars()
642 }
643 }
644
645 fn generic_levenshtein<'a, 'b, Iter1, Iter2, Elem1, Elem2>(
648 a: &'a Iter1,
649 b: &'b Iter2,
650 ) -> usize
651 where
652 &'a Iter1: IntoIterator<Item = Elem1>,
653 &'b Iter2: IntoIterator<Item = Elem2>,
654 Elem1: PartialEq<Elem2>,
655 {
656 let b_len = b.into_iter().count();
657
658 if a.into_iter().next().is_none() {
659 return b_len;
660 }
661
662 let mut cache: Vec<usize> = (1..b_len + 1).collect();
663
664 let mut result = 0;
665
666 for (i, a_elem) in a.into_iter().enumerate() {
667 result = i + 1;
668 let mut distance_b = i;
669
670 for (j, b_elem) in b.into_iter().enumerate() {
671 let cost = if a_elem == b_elem { 0usize } else { 1usize };
672 let distance_a = distance_b + cost;
673 distance_b = cache[j];
674 result = min(result + 1, min(distance_a, distance_b + 1));
675 cache[j] = result;
676 }
677 }
678
679 result
680 }
681
682 pub fn levenshtein(a: &str, b: &str) -> usize {
691 generic_levenshtein(&StringWrapper(a), &StringWrapper(b))
692 }
693
694 pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
708 if a.is_empty() && b.is_empty() {
709 return 1.0;
710 }
711 1.0 - (levenshtein(a, b) as f64)
712 / (a.chars().count().max(b.chars().count()) as f64)
713 }
714}
715
716pub fn merge_and_order_indices<T: Borrow<usize>, S: Borrow<usize>>(
719 first: impl IntoIterator<Item = T>,
720 second: impl IntoIterator<Item = S>,
721) -> Vec<usize> {
722 let mut result: Vec<_> = first
723 .into_iter()
724 .map(|e| *e.borrow())
725 .chain(second.into_iter().map(|e| *e.borrow()))
726 .collect::<HashSet<_>>()
727 .into_iter()
728 .collect();
729 result.sort();
730 result
731}
732
733pub fn set_difference<T: Borrow<usize>, S: Borrow<usize>>(
736 first: impl IntoIterator<Item = T>,
737 second: impl IntoIterator<Item = S>,
738) -> Vec<usize> {
739 let set: HashSet<_> = second.into_iter().map(|e| *e.borrow()).collect();
740 first
741 .into_iter()
742 .map(|e| *e.borrow())
743 .filter(|e| !set.contains(e))
744 .collect()
745}
746
747pub fn find_indices<T: PartialEq, S: Borrow<T>>(
750 items: &[T],
751 targets: impl IntoIterator<Item = S>,
752) -> Result<Vec<usize>> {
753 targets
754 .into_iter()
755 .map(|target| items.iter().position(|e| target.borrow().eq(e)))
756 .collect::<Option<_>>()
757 .ok_or_else(|| DataFusionError::Execution("Target not found".to_string()))
758}
759
760pub fn transpose<T>(original: Vec<Vec<T>>) -> Vec<Vec<T>> {
762 match original.as_slice() {
763 [] => vec![],
764 [first, ..] => {
765 let mut result = (0..first.len()).map(|_| vec![]).collect::<Vec<_>>();
766 for row in original {
767 for (item, transposed_row) in row.into_iter().zip(&mut result) {
768 transposed_row.push(item);
769 }
770 }
771 result
772 }
773 }
774}
775
776pub fn combine_limit(
820 parent_skip: usize,
821 parent_fetch: Option<usize>,
822 child_skip: usize,
823 child_fetch: Option<usize>,
824) -> (usize, Option<usize>) {
825 let combined_skip = child_skip.saturating_add(parent_skip);
826
827 let combined_fetch = match (parent_fetch, child_fetch) {
828 (Some(parent_fetch), Some(child_fetch)) => {
829 Some(min(parent_fetch, child_fetch.saturating_sub(parent_skip)))
830 }
831 (Some(parent_fetch), None) => Some(parent_fetch),
832 (None, Some(child_fetch)) => Some(child_fetch.saturating_sub(parent_skip)),
833 (None, None) => None,
834 };
835
836 (combined_skip, combined_fetch)
837}
838
839pub fn get_available_parallelism() -> usize {
844 available_parallelism()
845 .unwrap_or(NonZero::new(1).expect("literal value `1` shouldn't be zero"))
846 .get()
847}
848
849pub fn take_function_args<const N: usize, T>(
873 function_name: &str,
874 args: impl IntoIterator<Item = T>,
875) -> Result<[T; N]> {
876 let args = args.into_iter().collect::<Vec<_>>();
877 args.try_into().map_err(|v: Vec<T>| {
878 _exec_datafusion_err!(
879 "{} function requires {} {}, got {}",
880 function_name,
881 N,
882 if N == 1 { "argument" } else { "arguments" },
883 v.len()
884 )
885 })
886}
887
888#[cfg(test)]
889mod tests {
890 use super::*;
891 use crate::ScalarValue::Null;
892 use arrow::array::Float64Array;
893 use sqlparser::tokenizer::Span;
894
895 #[test]
896 fn test_bisect_linear_left_and_right() -> Result<()> {
897 let arrays: Vec<ArrayRef> = vec![
898 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.])),
899 Arc::new(Float64Array::from(vec![2.0, 3.0, 3.0, 4.0, 5.0])),
900 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 10., 11.0])),
901 Arc::new(Float64Array::from(vec![15.0, 13.0, 8.0, 5., 0.0])),
902 ];
903 let search_tuple: Vec<ScalarValue> = vec![
904 ScalarValue::Float64(Some(8.0)),
905 ScalarValue::Float64(Some(3.0)),
906 ScalarValue::Float64(Some(8.0)),
907 ScalarValue::Float64(Some(8.0)),
908 ];
909 let ords = [
910 SortOptions {
911 descending: false,
912 nulls_first: true,
913 },
914 SortOptions {
915 descending: false,
916 nulls_first: true,
917 },
918 SortOptions {
919 descending: false,
920 nulls_first: true,
921 },
922 SortOptions {
923 descending: true,
924 nulls_first: true,
925 },
926 ];
927 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
928 assert_eq!(res, 2);
929 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
930 assert_eq!(res, 3);
931 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
932 assert_eq!(res, 2);
933 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
934 assert_eq!(res, 3);
935 Ok(())
936 }
937
938 #[test]
939 fn vector_ord() {
940 assert!(vec![1, 0, 0, 0, 0, 0, 0, 1] < vec![1, 0, 0, 0, 0, 0, 0, 2]);
941 assert!(vec![1, 0, 0, 0, 0, 0, 1, 1] > vec![1, 0, 0, 0, 0, 0, 0, 2]);
942 assert!(
943 vec![
944 ScalarValue::Int32(Some(2)),
945 Null,
946 ScalarValue::Int32(Some(0)),
947 ] < vec![
948 ScalarValue::Int32(Some(2)),
949 Null,
950 ScalarValue::Int32(Some(1)),
951 ]
952 );
953 assert!(
954 vec![
955 ScalarValue::Int32(Some(2)),
956 ScalarValue::Int32(None),
957 ScalarValue::Int32(Some(0)),
958 ] < vec![
959 ScalarValue::Int32(Some(2)),
960 ScalarValue::Int32(None),
961 ScalarValue::Int32(Some(1)),
962 ]
963 );
964 }
965
966 #[test]
967 fn ord_same_type() {
968 assert!((ScalarValue::Int32(Some(2)) < ScalarValue::Int32(Some(3))));
969 }
970
971 #[test]
972 fn test_bisect_linear_left_and_right_diff_sort() -> Result<()> {
973 let arrays: Vec<ArrayRef> =
975 vec![Arc::new(Float64Array::from(vec![4.0, 3.0, 2.0, 1.0, 0.0]))];
976 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(4.0))];
977 let ords = [SortOptions {
978 descending: true,
979 nulls_first: true,
980 }];
981 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
982 assert_eq!(res, 0);
983 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
984 assert_eq!(res, 0);
985
986 let arrays: Vec<ArrayRef> =
988 vec![Arc::new(Float64Array::from(vec![4.0, 3.0, 2.0, 1.0, 0.0]))];
989 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(4.0))];
990 let ords = [SortOptions {
991 descending: true,
992 nulls_first: true,
993 }];
994 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
995 assert_eq!(res, 1);
996 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
997 assert_eq!(res, 1);
998
999 let arrays: Vec<ArrayRef> =
1001 vec![Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.]))];
1002 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(7.0))];
1003 let ords = [SortOptions {
1004 descending: false,
1005 nulls_first: true,
1006 }];
1007 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1008 assert_eq!(res, 1);
1009 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1010 assert_eq!(res, 1);
1011
1012 let arrays: Vec<ArrayRef> =
1014 vec![Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.]))];
1015 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(7.0))];
1016 let ords = [SortOptions {
1017 descending: false,
1018 nulls_first: true,
1019 }];
1020 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1021 assert_eq!(res, 2);
1022 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1023 assert_eq!(res, 2);
1024
1025 let arrays: Vec<ArrayRef> = vec![
1026 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 8.0, 9., 10.])),
1027 Arc::new(Float64Array::from(vec![10.0, 9.0, 8.0, 7.5, 7., 6.])),
1028 ];
1029 let search_tuple: Vec<ScalarValue> = vec![
1030 ScalarValue::Float64(Some(8.0)),
1031 ScalarValue::Float64(Some(8.0)),
1032 ];
1033 let ords = [
1034 SortOptions {
1035 descending: false,
1036 nulls_first: true,
1037 },
1038 SortOptions {
1039 descending: true,
1040 nulls_first: true,
1041 },
1042 ];
1043 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1044 assert_eq!(res, 3);
1045 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1046 assert_eq!(res, 3);
1047
1048 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1049 assert_eq!(res, 2);
1050 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1051 assert_eq!(res, 2);
1052 Ok(())
1053 }
1054
1055 #[test]
1056 fn test_evaluate_partition_ranges() -> Result<()> {
1057 let arrays: Vec<ArrayRef> = vec![
1058 Arc::new(Float64Array::from(vec![1.0, 1.0, 1.0, 2.0, 2.0, 2.0])),
1059 Arc::new(Float64Array::from(vec![4.0, 4.0, 3.0, 2.0, 1.0, 1.0])),
1060 ];
1061 let n_row = arrays[0].len();
1062 let options: Vec<SortOptions> = vec![
1063 SortOptions {
1064 descending: false,
1065 nulls_first: false,
1066 },
1067 SortOptions {
1068 descending: true,
1069 nulls_first: false,
1070 },
1071 ];
1072 let sort_columns = arrays
1073 .into_iter()
1074 .zip(options)
1075 .map(|(values, options)| SortColumn {
1076 values,
1077 options: Some(options),
1078 })
1079 .collect::<Vec<_>>();
1080 let ranges = evaluate_partition_ranges(n_row, &sort_columns)?;
1081 assert_eq!(ranges.len(), 4);
1082 assert_eq!(ranges[0], Range { start: 0, end: 2 });
1083 assert_eq!(ranges[1], Range { start: 2, end: 3 });
1084 assert_eq!(ranges[2], Range { start: 3, end: 4 });
1085 assert_eq!(ranges[3], Range { start: 4, end: 6 });
1086 Ok(())
1087 }
1088
1089 #[test]
1090 fn test_quote_identifier() -> Result<()> {
1091 let cases = vec![
1092 ("foo", r#"foo"#),
1093 ("_foo", r#"_foo"#),
1094 ("foo_bar", r#"foo_bar"#),
1095 ("foo-bar", r#""foo-bar""#),
1096 ("foo.bar", r#""foo.bar""#),
1098 ("Foo", r#""Foo""#),
1099 ("Foo.Bar", r#""Foo.Bar""#),
1100 ("test1", r#"test1"#),
1102 ("1test", r#""1test""#),
1103 ];
1104
1105 for (identifier, quoted_identifier) in cases {
1106 println!("input: \n{identifier}\nquoted_identifier:\n{quoted_identifier}");
1107
1108 assert_eq!(quote_identifier(identifier), quoted_identifier);
1109
1110 let quote_style = if quoted_identifier.starts_with('"') {
1113 Some('"')
1114 } else {
1115 None
1116 };
1117
1118 let expected_parsed = vec![Ident {
1119 value: identifier.to_string(),
1120 quote_style,
1121 span: Span::empty(),
1122 }];
1123
1124 assert_eq!(
1125 parse_identifiers(quoted_identifier).unwrap(),
1126 expected_parsed
1127 );
1128 }
1129
1130 Ok(())
1131 }
1132
1133 #[test]
1134 fn test_get_at_indices() -> Result<()> {
1135 let in_vec = vec![1, 2, 3, 4, 5, 6, 7];
1136 assert_eq!(get_at_indices(&in_vec, [0, 2])?, vec![1, 3]);
1137 assert_eq!(get_at_indices(&in_vec, [4, 2])?, vec![5, 3]);
1138 assert!(get_at_indices(&in_vec, [7]).is_err());
1140 Ok(())
1141 }
1142
1143 #[test]
1144 fn test_longest_consecutive_prefix() {
1145 assert_eq!(longest_consecutive_prefix([0, 3, 4]), 1);
1146 assert_eq!(longest_consecutive_prefix([0, 1, 3, 4]), 2);
1147 assert_eq!(longest_consecutive_prefix([0, 1, 2, 3, 4]), 5);
1148 assert_eq!(longest_consecutive_prefix([1, 2, 3, 4]), 0);
1149 }
1150
1151 #[test]
1152 fn test_merge_and_order_indices() {
1153 assert_eq!(
1154 merge_and_order_indices([0, 3, 4], [1, 3, 5]),
1155 vec![0, 1, 3, 4, 5]
1156 );
1157 assert_eq!(
1159 merge_and_order_indices([3, 0, 4], [5, 1, 3]),
1160 vec![0, 1, 3, 4, 5]
1161 );
1162 }
1163
1164 #[test]
1165 fn test_set_difference() {
1166 assert_eq!(set_difference([0, 3, 4], [1, 2]), vec![0, 3, 4]);
1167 assert_eq!(set_difference([0, 3, 4], [1, 2, 4]), vec![0, 3]);
1168 assert_eq!(set_difference([3, 4, 0], [1, 2, 4]), vec![3, 0]);
1170 assert_eq!(set_difference([0, 3, 4], [4, 1, 2]), vec![0, 3]);
1171 assert_eq!(set_difference([3, 4, 0], [4, 1, 2]), vec![3, 0]);
1172 }
1173
1174 #[test]
1175 fn test_find_indices() -> Result<()> {
1176 assert_eq!(find_indices(&[0, 3, 4], [0, 3, 4])?, vec![0, 1, 2]);
1177 assert_eq!(find_indices(&[0, 3, 4], [0, 4, 3])?, vec![0, 2, 1]);
1178 assert_eq!(find_indices(&[3, 0, 4], [0, 3])?, vec![1, 0]);
1179 assert!(find_indices(&[0, 3], [0, 3, 4]).is_err());
1180 assert!(find_indices(&[0, 3, 4], [0, 2]).is_err());
1181 Ok(())
1182 }
1183
1184 #[test]
1185 fn test_transpose() -> Result<()> {
1186 let in_data = vec![vec![1, 2, 3], vec![4, 5, 6]];
1187 let transposed = transpose(in_data);
1188 let expected = vec![vec![1, 4], vec![2, 5], vec![3, 6]];
1189 assert_eq!(expected, transposed);
1190 Ok(())
1191 }
1192}