1pub mod expr;
21pub mod memory;
22pub mod proxy;
23pub mod string_utils;
24
25use crate::error::{_exec_datafusion_err, _internal_datafusion_err, _internal_err};
26use crate::{Result, ScalarValue};
27use arrow::array::{
28 cast::AsArray, Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray,
29 OffsetSizeTrait,
30};
31use arrow::buffer::OffsetBuffer;
32use arrow::compute::{partition, SortColumn, SortOptions};
33use arrow::datatypes::{DataType, Field, SchemaRef};
34#[cfg(feature = "sql")]
35use sqlparser::{ast::Ident, dialect::GenericDialect, parser::Parser};
36use std::borrow::{Borrow, Cow};
37use std::cmp::{min, Ordering};
38use std::collections::HashSet;
39use std::num::NonZero;
40use std::ops::Range;
41use std::sync::Arc;
42use std::thread::available_parallelism;
43
44pub fn project_schema(
71 schema: &SchemaRef,
72 projection: Option<&Vec<usize>>,
73) -> Result<SchemaRef> {
74 let schema = match projection {
75 Some(columns) => Arc::new(schema.project(columns)?),
76 None => Arc::clone(schema),
77 };
78 Ok(schema)
79}
80
81pub fn extract_row_at_idx_to_buf(
83 columns: &[ArrayRef],
84 idx: usize,
85 buf: &mut Vec<ScalarValue>,
86) -> Result<()> {
87 buf.clear();
88
89 let iter = columns
90 .iter()
91 .map(|arr| ScalarValue::try_from_array(arr, idx));
92 for v in iter.into_iter() {
93 buf.push(v?);
94 }
95
96 Ok(())
97}
98pub fn get_row_at_idx(columns: &[ArrayRef], idx: usize) -> Result<Vec<ScalarValue>> {
100 columns
101 .iter()
102 .map(|arr| ScalarValue::try_from_array(arr, idx))
103 .collect()
104}
105
106pub fn compare_rows(
108 x: &[ScalarValue],
109 y: &[ScalarValue],
110 sort_options: &[SortOptions],
111) -> Result<Ordering> {
112 let zip_it = x.iter().zip(y.iter()).zip(sort_options.iter());
113 for ((lhs, rhs), sort_options) in zip_it {
115 let result = match (lhs.is_null(), rhs.is_null(), sort_options.nulls_first) {
117 (true, false, false) | (false, true, true) => Ordering::Greater,
118 (true, false, true) | (false, true, false) => Ordering::Less,
119 (false, false, _) => {
120 if sort_options.descending {
121 rhs.try_cmp(lhs)?
122 } else {
123 lhs.try_cmp(rhs)?
124 }
125 }
126 (true, true, _) => continue,
127 };
128 if result != Ordering::Equal {
129 return Ok(result);
130 }
131 }
132 Ok(Ordering::Equal)
133}
134
135pub fn bisect<const SIDE: bool>(
140 item_columns: &[ArrayRef],
141 target: &[ScalarValue],
142 sort_options: &[SortOptions],
143) -> Result<usize> {
144 let low: usize = 0;
145 let high: usize = item_columns
146 .first()
147 .ok_or_else(|| _internal_datafusion_err!("Column array shouldn't be empty"))?
148 .len();
149 let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
150 let cmp = compare_rows(current, target, sort_options)?;
151 Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
152 };
153 find_bisect_point(item_columns, target, compare_fn, low, high)
154}
155
156pub fn find_bisect_point<F>(
163 item_columns: &[ArrayRef],
164 target: &[ScalarValue],
165 compare_fn: F,
166 mut low: usize,
167 mut high: usize,
168) -> Result<usize>
169where
170 F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
171{
172 while low < high {
173 let mid = ((high - low) / 2) + low;
174 let val = get_row_at_idx(item_columns, mid)?;
175 if compare_fn(&val, target)? {
176 low = mid + 1;
177 } else {
178 high = mid;
179 }
180 }
181 Ok(low)
182}
183
184pub fn linear_search<const SIDE: bool>(
189 item_columns: &[ArrayRef],
190 target: &[ScalarValue],
191 sort_options: &[SortOptions],
192) -> Result<usize> {
193 let low: usize = 0;
194 let high: usize = item_columns
195 .first()
196 .ok_or_else(|| _internal_datafusion_err!("Column array shouldn't be empty"))?
197 .len();
198 let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
199 let cmp = compare_rows(current, target, sort_options)?;
200 Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
201 };
202 search_in_slice(item_columns, target, compare_fn, low, high)
203}
204
205pub fn search_in_slice<F>(
210 item_columns: &[ArrayRef],
211 target: &[ScalarValue],
212 compare_fn: F,
213 mut low: usize,
214 high: usize,
215) -> Result<usize>
216where
217 F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
218{
219 while low < high {
220 let val = get_row_at_idx(item_columns, low)?;
221 if !compare_fn(&val, target)? {
222 break;
223 }
224 low += 1;
225 }
226 Ok(low)
227}
228
229pub fn evaluate_partition_ranges(
234 num_rows: usize,
235 partition_columns: &[SortColumn],
236) -> Result<Vec<Range<usize>>> {
237 Ok(if partition_columns.is_empty() {
238 vec![Range {
239 start: 0,
240 end: num_rows,
241 }]
242 } else {
243 let cols: Vec<_> = partition_columns
244 .iter()
245 .map(|x| Arc::clone(&x.values))
246 .collect();
247 partition(&cols)?.ranges()
248 })
249}
250
251pub fn quote_identifier(s: &str) -> Cow<'_, str> {
256 if needs_quotes(s) {
257 Cow::Owned(format!("\"{}\"", s.replace('"', "\"\"")))
258 } else {
259 Cow::Borrowed(s)
260 }
261}
262
263fn needs_quotes(s: &str) -> bool {
265 let mut chars = s.chars();
266
267 if let Some(first_char) = chars.next() {
269 if !(first_char.is_ascii_lowercase() || first_char == '_') {
270 return true;
271 }
272 }
273
274 !chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
275}
276
277#[cfg(feature = "sql")]
278pub(crate) fn parse_identifiers(s: &str) -> Result<Vec<Ident>> {
279 let dialect = GenericDialect;
280 let mut parser = Parser::new(&dialect).try_with_sql(s)?;
281 let idents = parser.parse_multipart_identifier()?;
282 Ok(idents)
283}
284
285#[cfg(feature = "sql")]
289pub(crate) fn parse_identifiers_normalized(s: &str, ignore_case: bool) -> Vec<String> {
290 parse_identifiers(s)
291 .unwrap_or_default()
292 .into_iter()
293 .map(|id| match id.quote_style {
294 Some(_) => id.value,
295 None if ignore_case => id.value,
296 _ => id.value.to_ascii_lowercase(),
297 })
298 .collect::<Vec<_>>()
299}
300
301#[cfg(not(feature = "sql"))]
302pub(crate) fn parse_identifiers(s: &str) -> Result<Vec<String>> {
303 let mut result = Vec::new();
304 let mut current = String::new();
305 let mut in_quotes = false;
306
307 for ch in s.chars() {
308 match ch {
309 '"' => {
310 in_quotes = !in_quotes;
311 current.push(ch);
312 }
313 '.' if !in_quotes => {
314 result.push(current.clone());
315 current.clear();
316 }
317 _ => {
318 current.push(ch);
319 }
320 }
321 }
322
323 if !current.is_empty() {
325 result.push(current);
326 }
327
328 Ok(result)
329}
330
331#[cfg(not(feature = "sql"))]
332pub(crate) fn parse_identifiers_normalized(s: &str, ignore_case: bool) -> Vec<String> {
333 parse_identifiers(s)
334 .unwrap_or_default()
335 .into_iter()
336 .map(|id| {
337 let is_double_quoted = if id.len() > 2 {
338 let mut chars = id.chars();
339 chars.next() == Some('"') && chars.last() == Some('"')
340 } else {
341 false
342 };
343 if is_double_quoted {
344 id[1..id.len() - 1].to_string().replace("\"\"", "\"")
345 } else if ignore_case {
346 id
347 } else {
348 id.to_ascii_lowercase()
349 }
350 })
351 .collect::<Vec<_>>()
352}
353
354pub fn get_at_indices<T: Clone, I: Borrow<usize>>(
356 items: &[T],
357 indices: impl IntoIterator<Item = I>,
358) -> Result<Vec<T>> {
359 indices
360 .into_iter()
361 .map(|idx| items.get(*idx.borrow()).cloned())
362 .collect::<Option<Vec<T>>>()
363 .ok_or_else(|| {
364 _exec_datafusion_err!("Expects indices to be in the range of searched vector")
365 })
366}
367
368pub fn longest_consecutive_prefix<T: Borrow<usize>>(
374 sequence: impl IntoIterator<Item = T>,
375) -> usize {
376 let mut count = 0;
377 for item in sequence {
378 if !count.eq(item.borrow()) {
379 break;
380 }
381 count += 1;
382 }
383 count
384}
385
386#[derive(Debug, Clone)]
408pub struct SingleRowListArrayBuilder {
409 arr: ArrayRef,
411 nullable: bool,
413 field_name: Option<String>,
416}
417
418impl SingleRowListArrayBuilder {
419 pub fn new(arr: ArrayRef) -> Self {
421 Self {
422 arr,
423 nullable: true,
424 field_name: None,
425 }
426 }
427
428 pub fn with_nullable(mut self, nullable: bool) -> Self {
430 self.nullable = nullable;
431 self
432 }
433
434 pub fn with_field_name(mut self, field_name: Option<String>) -> Self {
436 self.field_name = field_name;
437 self
438 }
439
440 pub fn with_field(self, field: &Field) -> Self {
442 self.with_field_name(Some(field.name().to_owned()))
443 .with_nullable(field.is_nullable())
444 }
445
446 pub fn build_list_array(self) -> ListArray {
448 let (field, arr) = self.into_field_and_arr();
449 let offsets = OffsetBuffer::from_lengths([arr.len()]);
450 ListArray::new(field, offsets, arr, None)
451 }
452
453 pub fn build_list_scalar(self) -> ScalarValue {
455 ScalarValue::List(Arc::new(self.build_list_array()))
456 }
457
458 pub fn build_large_list_array(self) -> LargeListArray {
460 let (field, arr) = self.into_field_and_arr();
461 let offsets = OffsetBuffer::from_lengths([arr.len()]);
462 LargeListArray::new(field, offsets, arr, None)
463 }
464
465 pub fn build_large_list_scalar(self) -> ScalarValue {
467 ScalarValue::LargeList(Arc::new(self.build_large_list_array()))
468 }
469
470 pub fn build_fixed_size_list_array(self, list_size: usize) -> FixedSizeListArray {
472 let (field, arr) = self.into_field_and_arr();
473 FixedSizeListArray::new(field, list_size as i32, arr, None)
474 }
475
476 pub fn build_fixed_size_list_scalar(self, list_size: usize) -> ScalarValue {
478 ScalarValue::FixedSizeList(Arc::new(self.build_fixed_size_list_array(list_size)))
479 }
480
481 fn into_field_and_arr(self) -> (Arc<Field>, ArrayRef) {
483 let Self {
484 arr,
485 nullable,
486 field_name,
487 } = self;
488 let data_type = arr.data_type().to_owned();
489 let field = match field_name {
490 Some(name) => Field::new(name, data_type, nullable),
491 None => Field::new_list_field(data_type, nullable),
492 };
493 (Arc::new(field), arr)
494 }
495}
496
497pub fn arrays_into_list_array(
519 arr: impl IntoIterator<Item = ArrayRef>,
520) -> Result<ListArray> {
521 let arr = arr.into_iter().collect::<Vec<_>>();
522 if arr.is_empty() {
523 return _internal_err!("Cannot wrap empty array into list array");
524 }
525
526 let lens = arr.iter().map(|x| x.len()).collect::<Vec<_>>();
527 let data_type = arr[0].data_type().to_owned();
529 let values = arr.iter().map(|x| x.as_ref()).collect::<Vec<_>>();
530 Ok(ListArray::new(
531 Arc::new(Field::new_list_field(data_type, true)),
532 OffsetBuffer::from_lengths(lens),
533 arrow::compute::concat(values.as_slice())?,
534 None,
535 ))
536}
537
538pub fn list_to_arrays<O: OffsetSizeTrait>(a: &ArrayRef) -> Vec<ArrayRef> {
540 a.as_list::<O>().iter().flatten().collect::<Vec<_>>()
541}
542
543pub fn fixed_size_list_to_arrays(a: &ArrayRef) -> Vec<ArrayRef> {
545 a.as_fixed_size_list().iter().flatten().collect::<Vec<_>>()
546}
547
548pub fn base_type(data_type: &DataType) -> DataType {
564 match data_type {
565 DataType::List(field)
566 | DataType::LargeList(field)
567 | DataType::FixedSizeList(field, _) => base_type(field.data_type()),
568 _ => data_type.to_owned(),
569 }
570}
571
572#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
574pub enum ListCoercion {
575 FixedSizedListToList,
577}
578
579pub fn coerced_type_with_base_type_only(
592 data_type: &DataType,
593 base_type: &DataType,
594 array_coercion: Option<&ListCoercion>,
595) -> DataType {
596 match (data_type, array_coercion) {
597 (DataType::List(field), _)
598 | (DataType::FixedSizeList(field, _), Some(ListCoercion::FixedSizedListToList)) =>
599 {
600 let field_type = coerced_type_with_base_type_only(
601 field.data_type(),
602 base_type,
603 array_coercion,
604 );
605
606 DataType::List(Arc::new(Field::new(
607 field.name(),
608 field_type,
609 field.is_nullable(),
610 )))
611 }
612 (DataType::FixedSizeList(field, len), _) => {
613 let field_type = coerced_type_with_base_type_only(
614 field.data_type(),
615 base_type,
616 array_coercion,
617 );
618
619 DataType::FixedSizeList(
620 Arc::new(Field::new(field.name(), field_type, field.is_nullable())),
621 *len,
622 )
623 }
624 (DataType::LargeList(field), _) => {
625 let field_type = coerced_type_with_base_type_only(
626 field.data_type(),
627 base_type,
628 array_coercion,
629 );
630
631 DataType::LargeList(Arc::new(Field::new(
632 field.name(),
633 field_type,
634 field.is_nullable(),
635 )))
636 }
637
638 _ => base_type.clone(),
639 }
640}
641
642pub fn coerced_fixed_size_list_to_list(data_type: &DataType) -> DataType {
644 match data_type {
645 DataType::List(field) | DataType::FixedSizeList(field, _) => {
646 let field_type = coerced_fixed_size_list_to_list(field.data_type());
647
648 DataType::List(Arc::new(Field::new(
649 field.name(),
650 field_type,
651 field.is_nullable(),
652 )))
653 }
654 DataType::LargeList(field) => {
655 let field_type = coerced_fixed_size_list_to_list(field.data_type());
656
657 DataType::LargeList(Arc::new(Field::new(
658 field.name(),
659 field_type,
660 field.is_nullable(),
661 )))
662 }
663
664 _ => data_type.clone(),
665 }
666}
667
668pub fn list_ndims(data_type: &DataType) -> u64 {
670 match data_type {
671 DataType::List(field)
672 | DataType::LargeList(field)
673 | DataType::FixedSizeList(field, _) => 1 + list_ndims(field.data_type()),
674 _ => 0,
675 }
676}
677
678pub mod datafusion_strsim {
680 use std::cmp::min;
683 use std::str::Chars;
684
685 struct StringWrapper<'a>(&'a str);
686
687 impl<'b> IntoIterator for &StringWrapper<'b> {
688 type Item = char;
689 type IntoIter = Chars<'b>;
690
691 fn into_iter(self) -> Self::IntoIter {
692 self.0.chars()
693 }
694 }
695
696 fn generic_levenshtein<'a, 'b, Iter1, Iter2, Elem1, Elem2>(
699 a: &'a Iter1,
700 b: &'b Iter2,
701 ) -> usize
702 where
703 &'a Iter1: IntoIterator<Item = Elem1>,
704 &'b Iter2: IntoIterator<Item = Elem2>,
705 Elem1: PartialEq<Elem2>,
706 {
707 let b_len = b.into_iter().count();
708
709 if a.into_iter().next().is_none() {
710 return b_len;
711 }
712
713 let mut cache: Vec<usize> = (1..b_len + 1).collect();
714
715 let mut result = 0;
716
717 for (i, a_elem) in a.into_iter().enumerate() {
718 result = i + 1;
719 let mut distance_b = i;
720
721 for (j, b_elem) in b.into_iter().enumerate() {
722 let cost = if a_elem == b_elem { 0usize } else { 1usize };
723 let distance_a = distance_b + cost;
724 distance_b = cache[j];
725 result = min(result + 1, min(distance_a, distance_b + 1));
726 cache[j] = result;
727 }
728 }
729
730 result
731 }
732
733 pub fn levenshtein(a: &str, b: &str) -> usize {
742 generic_levenshtein(&StringWrapper(a), &StringWrapper(b))
743 }
744
745 pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
759 if a.is_empty() && b.is_empty() {
760 return 1.0;
761 }
762 1.0 - (levenshtein(a, b) as f64)
763 / (a.chars().count().max(b.chars().count()) as f64)
764 }
765}
766
767pub fn merge_and_order_indices<T: Borrow<usize>, S: Borrow<usize>>(
770 first: impl IntoIterator<Item = T>,
771 second: impl IntoIterator<Item = S>,
772) -> Vec<usize> {
773 let mut result: Vec<_> = first
774 .into_iter()
775 .map(|e| *e.borrow())
776 .chain(second.into_iter().map(|e| *e.borrow()))
777 .collect::<HashSet<_>>()
778 .into_iter()
779 .collect();
780 result.sort();
781 result
782}
783
784pub fn set_difference<T: Borrow<usize>, S: Borrow<usize>>(
787 first: impl IntoIterator<Item = T>,
788 second: impl IntoIterator<Item = S>,
789) -> Vec<usize> {
790 let set: HashSet<_> = second.into_iter().map(|e| *e.borrow()).collect();
791 first
792 .into_iter()
793 .map(|e| *e.borrow())
794 .filter(|e| !set.contains(e))
795 .collect()
796}
797
798pub fn find_indices<T: PartialEq, S: Borrow<T>>(
801 items: &[T],
802 targets: impl IntoIterator<Item = S>,
803) -> Result<Vec<usize>> {
804 targets
805 .into_iter()
806 .map(|target| items.iter().position(|e| target.borrow().eq(e)))
807 .collect::<Option<_>>()
808 .ok_or_else(|| _exec_datafusion_err!("Target not found"))
809}
810
811pub fn transpose<T>(original: Vec<Vec<T>>) -> Vec<Vec<T>> {
813 match original.as_slice() {
814 [] => vec![],
815 [first, ..] => {
816 let mut result = (0..first.len()).map(|_| vec![]).collect::<Vec<_>>();
817 for row in original {
818 for (item, transposed_row) in row.into_iter().zip(&mut result) {
819 transposed_row.push(item);
820 }
821 }
822 result
823 }
824 }
825}
826
827pub fn combine_limit(
871 parent_skip: usize,
872 parent_fetch: Option<usize>,
873 child_skip: usize,
874 child_fetch: Option<usize>,
875) -> (usize, Option<usize>) {
876 let combined_skip = child_skip.saturating_add(parent_skip);
877
878 let combined_fetch = match (parent_fetch, child_fetch) {
879 (Some(parent_fetch), Some(child_fetch)) => {
880 Some(min(parent_fetch, child_fetch.saturating_sub(parent_skip)))
881 }
882 (Some(parent_fetch), None) => Some(parent_fetch),
883 (None, Some(child_fetch)) => Some(child_fetch.saturating_sub(parent_skip)),
884 (None, None) => None,
885 };
886
887 (combined_skip, combined_fetch)
888}
889
890pub fn get_available_parallelism() -> usize {
895 available_parallelism()
896 .unwrap_or(NonZero::new(1).expect("literal value `1` shouldn't be zero"))
897 .get()
898}
899
900pub fn take_function_args<const N: usize, T>(
927 function_name: &str,
928 args: impl IntoIterator<Item = T>,
929) -> Result<[T; N]> {
930 let args = args.into_iter().collect::<Vec<_>>();
931 args.try_into().map_err(|v: Vec<T>| {
932 _exec_datafusion_err!(
933 "{} function requires {} {}, got {}",
934 function_name,
935 N,
936 if N == 1 { "argument" } else { "arguments" },
937 v.len()
938 )
939 })
940}
941
942#[cfg(test)]
943mod tests {
944 use super::*;
945 use crate::ScalarValue::Null;
946 use arrow::array::Float64Array;
947 use sqlparser::ast::Ident;
948 use sqlparser::tokenizer::Span;
949
950 #[test]
951 fn test_bisect_linear_left_and_right() -> Result<()> {
952 let arrays: Vec<ArrayRef> = vec![
953 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.])),
954 Arc::new(Float64Array::from(vec![2.0, 3.0, 3.0, 4.0, 5.0])),
955 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 10., 11.0])),
956 Arc::new(Float64Array::from(vec![15.0, 13.0, 8.0, 5., 0.0])),
957 ];
958 let search_tuple: Vec<ScalarValue> = vec![
959 ScalarValue::Float64(Some(8.0)),
960 ScalarValue::Float64(Some(3.0)),
961 ScalarValue::Float64(Some(8.0)),
962 ScalarValue::Float64(Some(8.0)),
963 ];
964 let ords = [
965 SortOptions {
966 descending: false,
967 nulls_first: true,
968 },
969 SortOptions {
970 descending: false,
971 nulls_first: true,
972 },
973 SortOptions {
974 descending: false,
975 nulls_first: true,
976 },
977 SortOptions {
978 descending: true,
979 nulls_first: true,
980 },
981 ];
982 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
983 assert_eq!(res, 2);
984 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
985 assert_eq!(res, 3);
986 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
987 assert_eq!(res, 2);
988 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
989 assert_eq!(res, 3);
990 Ok(())
991 }
992
993 #[test]
994 fn vector_ord() {
995 assert!(vec![1, 0, 0, 0, 0, 0, 0, 1] < vec![1, 0, 0, 0, 0, 0, 0, 2]);
996 assert!(vec![1, 0, 0, 0, 0, 0, 1, 1] > vec![1, 0, 0, 0, 0, 0, 0, 2]);
997 assert!(
998 vec![
999 ScalarValue::Int32(Some(2)),
1000 Null,
1001 ScalarValue::Int32(Some(0)),
1002 ] < vec![
1003 ScalarValue::Int32(Some(2)),
1004 Null,
1005 ScalarValue::Int32(Some(1)),
1006 ]
1007 );
1008 assert!(
1009 vec![
1010 ScalarValue::Int32(Some(2)),
1011 ScalarValue::Int32(None),
1012 ScalarValue::Int32(Some(0)),
1013 ] < vec![
1014 ScalarValue::Int32(Some(2)),
1015 ScalarValue::Int32(None),
1016 ScalarValue::Int32(Some(1)),
1017 ]
1018 );
1019 }
1020
1021 #[test]
1022 fn ord_same_type() {
1023 assert!((ScalarValue::Int32(Some(2)) < ScalarValue::Int32(Some(3))));
1024 }
1025
1026 #[test]
1027 fn test_bisect_linear_left_and_right_diff_sort() -> Result<()> {
1028 let arrays: Vec<ArrayRef> =
1030 vec![Arc::new(Float64Array::from(vec![4.0, 3.0, 2.0, 1.0, 0.0]))];
1031 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(4.0))];
1032 let ords = [SortOptions {
1033 descending: true,
1034 nulls_first: true,
1035 }];
1036 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1037 assert_eq!(res, 0);
1038 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1039 assert_eq!(res, 0);
1040
1041 let arrays: Vec<ArrayRef> =
1043 vec![Arc::new(Float64Array::from(vec![4.0, 3.0, 2.0, 1.0, 0.0]))];
1044 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(4.0))];
1045 let ords = [SortOptions {
1046 descending: true,
1047 nulls_first: true,
1048 }];
1049 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1050 assert_eq!(res, 1);
1051 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1052 assert_eq!(res, 1);
1053
1054 let arrays: Vec<ArrayRef> =
1056 vec![Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.]))];
1057 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(7.0))];
1058 let ords = [SortOptions {
1059 descending: false,
1060 nulls_first: true,
1061 }];
1062 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1063 assert_eq!(res, 1);
1064 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1065 assert_eq!(res, 1);
1066
1067 let arrays: Vec<ArrayRef> =
1069 vec![Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.]))];
1070 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(7.0))];
1071 let ords = [SortOptions {
1072 descending: false,
1073 nulls_first: true,
1074 }];
1075 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1076 assert_eq!(res, 2);
1077 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1078 assert_eq!(res, 2);
1079
1080 let arrays: Vec<ArrayRef> = vec![
1081 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 8.0, 9., 10.])),
1082 Arc::new(Float64Array::from(vec![10.0, 9.0, 8.0, 7.5, 7., 6.])),
1083 ];
1084 let search_tuple: Vec<ScalarValue> = vec![
1085 ScalarValue::Float64(Some(8.0)),
1086 ScalarValue::Float64(Some(8.0)),
1087 ];
1088 let ords = [
1089 SortOptions {
1090 descending: false,
1091 nulls_first: true,
1092 },
1093 SortOptions {
1094 descending: true,
1095 nulls_first: true,
1096 },
1097 ];
1098 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1099 assert_eq!(res, 3);
1100 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1101 assert_eq!(res, 3);
1102
1103 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1104 assert_eq!(res, 2);
1105 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1106 assert_eq!(res, 2);
1107 Ok(())
1108 }
1109
1110 #[test]
1111 fn test_evaluate_partition_ranges() -> Result<()> {
1112 let arrays: Vec<ArrayRef> = vec![
1113 Arc::new(Float64Array::from(vec![1.0, 1.0, 1.0, 2.0, 2.0, 2.0])),
1114 Arc::new(Float64Array::from(vec![4.0, 4.0, 3.0, 2.0, 1.0, 1.0])),
1115 ];
1116 let n_row = arrays[0].len();
1117 let options: Vec<SortOptions> = vec![
1118 SortOptions {
1119 descending: false,
1120 nulls_first: false,
1121 },
1122 SortOptions {
1123 descending: true,
1124 nulls_first: false,
1125 },
1126 ];
1127 let sort_columns = arrays
1128 .into_iter()
1129 .zip(options)
1130 .map(|(values, options)| SortColumn {
1131 values,
1132 options: Some(options),
1133 })
1134 .collect::<Vec<_>>();
1135 let ranges = evaluate_partition_ranges(n_row, &sort_columns)?;
1136 assert_eq!(ranges.len(), 4);
1137 assert_eq!(ranges[0], Range { start: 0, end: 2 });
1138 assert_eq!(ranges[1], Range { start: 2, end: 3 });
1139 assert_eq!(ranges[2], Range { start: 3, end: 4 });
1140 assert_eq!(ranges[3], Range { start: 4, end: 6 });
1141 Ok(())
1142 }
1143
1144 #[cfg(feature = "sql")]
1145 #[test]
1146 fn test_quote_identifier() -> Result<()> {
1147 let cases = vec![
1148 ("foo", r#"foo"#),
1149 ("_foo", r#"_foo"#),
1150 ("foo_bar", r#"foo_bar"#),
1151 ("foo-bar", r#""foo-bar""#),
1152 ("foo.bar", r#""foo.bar""#),
1154 ("Foo", r#""Foo""#),
1155 ("Foo.Bar", r#""Foo.Bar""#),
1156 ("test1", r#"test1"#),
1158 ("1test", r#""1test""#),
1159 ];
1160
1161 for (identifier, quoted_identifier) in cases {
1162 println!("input: \n{identifier}\nquoted_identifier:\n{quoted_identifier}");
1163
1164 assert_eq!(quote_identifier(identifier), quoted_identifier);
1165
1166 let quote_style = if quoted_identifier.starts_with('"') {
1169 Some('"')
1170 } else {
1171 None
1172 };
1173
1174 let expected_parsed = vec![Ident {
1175 value: identifier.to_string(),
1176 quote_style,
1177 span: Span::empty(),
1178 }];
1179
1180 assert_eq!(
1181 parse_identifiers(quoted_identifier).unwrap(),
1182 expected_parsed
1183 );
1184 }
1185
1186 Ok(())
1187 }
1188
1189 #[test]
1190 fn test_get_at_indices() -> Result<()> {
1191 let in_vec = vec![1, 2, 3, 4, 5, 6, 7];
1192 assert_eq!(get_at_indices(&in_vec, [0, 2])?, vec![1, 3]);
1193 assert_eq!(get_at_indices(&in_vec, [4, 2])?, vec![5, 3]);
1194 assert!(get_at_indices(&in_vec, [7]).is_err());
1196 Ok(())
1197 }
1198
1199 #[test]
1200 fn test_longest_consecutive_prefix() {
1201 assert_eq!(longest_consecutive_prefix([0, 3, 4]), 1);
1202 assert_eq!(longest_consecutive_prefix([0, 1, 3, 4]), 2);
1203 assert_eq!(longest_consecutive_prefix([0, 1, 2, 3, 4]), 5);
1204 assert_eq!(longest_consecutive_prefix([1, 2, 3, 4]), 0);
1205 }
1206
1207 #[test]
1208 fn test_merge_and_order_indices() {
1209 assert_eq!(
1210 merge_and_order_indices([0, 3, 4], [1, 3, 5]),
1211 vec![0, 1, 3, 4, 5]
1212 );
1213 assert_eq!(
1215 merge_and_order_indices([3, 0, 4], [5, 1, 3]),
1216 vec![0, 1, 3, 4, 5]
1217 );
1218 }
1219
1220 #[test]
1221 fn test_set_difference() {
1222 assert_eq!(set_difference([0, 3, 4], [1, 2]), vec![0, 3, 4]);
1223 assert_eq!(set_difference([0, 3, 4], [1, 2, 4]), vec![0, 3]);
1224 assert_eq!(set_difference([3, 4, 0], [1, 2, 4]), vec![3, 0]);
1226 assert_eq!(set_difference([0, 3, 4], [4, 1, 2]), vec![0, 3]);
1227 assert_eq!(set_difference([3, 4, 0], [4, 1, 2]), vec![3, 0]);
1228 }
1229
1230 #[test]
1231 fn test_find_indices() -> Result<()> {
1232 assert_eq!(find_indices(&[0, 3, 4], [0, 3, 4])?, vec![0, 1, 2]);
1233 assert_eq!(find_indices(&[0, 3, 4], [0, 4, 3])?, vec![0, 2, 1]);
1234 assert_eq!(find_indices(&[3, 0, 4], [0, 3])?, vec![1, 0]);
1235 assert!(find_indices(&[0, 3], [0, 3, 4]).is_err());
1236 assert!(find_indices(&[0, 3, 4], [0, 2]).is_err());
1237 Ok(())
1238 }
1239
1240 #[test]
1241 fn test_transpose() -> Result<()> {
1242 let in_data = vec![vec![1, 2, 3], vec![4, 5, 6]];
1243 let transposed = transpose(in_data);
1244 let expected = vec![vec![1, 4], vec![2, 5], vec![3, 6]];
1245 assert_eq!(expected, transposed);
1246 Ok(())
1247 }
1248}