1pub mod expr;
21pub mod memory;
22pub mod proxy;
23pub mod string_utils;
24
25use crate::assert_or_internal_err;
26use crate::error::{_exec_datafusion_err, _internal_datafusion_err};
27use crate::{Result, ScalarValue};
28use arrow::array::{
29 Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray, OffsetSizeTrait,
30 cast::AsArray,
31};
32use arrow::buffer::OffsetBuffer;
33use arrow::compute::{SortColumn, SortOptions, partition};
34use arrow::datatypes::{DataType, Field, SchemaRef};
35#[cfg(feature = "sql")]
36use sqlparser::{ast::Ident, dialect::GenericDialect, parser::Parser};
37use std::borrow::{Borrow, Cow};
38use std::cmp::{Ordering, min};
39use std::collections::HashSet;
40use std::num::NonZero;
41use std::ops::Range;
42use std::sync::Arc;
43use std::thread::available_parallelism;
44
45pub fn project_schema(
72 schema: &SchemaRef,
73 projection: Option<&impl AsRef<[usize]>>,
74) -> Result<SchemaRef> {
75 let schema = match projection {
76 Some(columns) => Arc::new(schema.project(columns.as_ref())?),
77 None => Arc::clone(schema),
78 };
79 Ok(schema)
80}
81
82pub fn extract_row_at_idx_to_buf(
84 columns: &[ArrayRef],
85 idx: usize,
86 buf: &mut Vec<ScalarValue>,
87) -> Result<()> {
88 buf.clear();
89
90 let iter = columns
91 .iter()
92 .map(|arr| ScalarValue::try_from_array(arr, idx));
93 for v in iter.into_iter() {
94 buf.push(v?);
95 }
96
97 Ok(())
98}
99pub fn get_row_at_idx(columns: &[ArrayRef], idx: usize) -> Result<Vec<ScalarValue>> {
101 columns
102 .iter()
103 .map(|arr| ScalarValue::try_from_array(arr, idx))
104 .collect()
105}
106
107pub fn compare_rows(
109 x: &[ScalarValue],
110 y: &[ScalarValue],
111 sort_options: &[SortOptions],
112) -> Result<Ordering> {
113 let zip_it = x.iter().zip(y.iter()).zip(sort_options.iter());
114 for ((lhs, rhs), sort_options) in zip_it {
116 let result = match (lhs.is_null(), rhs.is_null(), sort_options.nulls_first) {
118 (true, false, false) | (false, true, true) => Ordering::Greater,
119 (true, false, true) | (false, true, false) => Ordering::Less,
120 (false, false, _) => {
121 if sort_options.descending {
122 rhs.try_cmp(lhs)?
123 } else {
124 lhs.try_cmp(rhs)?
125 }
126 }
127 (true, true, _) => continue,
128 };
129 if result != Ordering::Equal {
130 return Ok(result);
131 }
132 }
133 Ok(Ordering::Equal)
134}
135
136pub fn bisect<const SIDE: bool>(
141 item_columns: &[ArrayRef],
142 target: &[ScalarValue],
143 sort_options: &[SortOptions],
144) -> Result<usize> {
145 let low: usize = 0;
146 let high: usize = item_columns
147 .first()
148 .ok_or_else(|| _internal_datafusion_err!("Column array shouldn't be empty"))?
149 .len();
150 let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
151 let cmp = compare_rows(current, target, sort_options)?;
152 Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
153 };
154 find_bisect_point(item_columns, target, compare_fn, low, high)
155}
156
157pub fn find_bisect_point<F>(
164 item_columns: &[ArrayRef],
165 target: &[ScalarValue],
166 compare_fn: F,
167 mut low: usize,
168 mut high: usize,
169) -> Result<usize>
170where
171 F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
172{
173 while low < high {
174 let mid = ((high - low) / 2) + low;
175 let val = get_row_at_idx(item_columns, mid)?;
176 if compare_fn(&val, target)? {
177 low = mid + 1;
178 } else {
179 high = mid;
180 }
181 }
182 Ok(low)
183}
184
185pub fn linear_search<const SIDE: bool>(
190 item_columns: &[ArrayRef],
191 target: &[ScalarValue],
192 sort_options: &[SortOptions],
193) -> Result<usize> {
194 let low: usize = 0;
195 let high: usize = item_columns
196 .first()
197 .ok_or_else(|| _internal_datafusion_err!("Column array shouldn't be empty"))?
198 .len();
199 let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
200 let cmp = compare_rows(current, target, sort_options)?;
201 Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
202 };
203 search_in_slice(item_columns, target, compare_fn, low, high)
204}
205
206pub fn search_in_slice<F>(
211 item_columns: &[ArrayRef],
212 target: &[ScalarValue],
213 compare_fn: F,
214 mut low: usize,
215 high: usize,
216) -> Result<usize>
217where
218 F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
219{
220 while low < high {
221 let val = get_row_at_idx(item_columns, low)?;
222 if !compare_fn(&val, target)? {
223 break;
224 }
225 low += 1;
226 }
227 Ok(low)
228}
229
230pub fn evaluate_partition_ranges(
235 num_rows: usize,
236 partition_columns: &[SortColumn],
237) -> Result<Vec<Range<usize>>> {
238 Ok(if partition_columns.is_empty() {
239 vec![Range {
240 start: 0,
241 end: num_rows,
242 }]
243 } else {
244 let cols: Vec<_> = partition_columns
245 .iter()
246 .map(|x| Arc::clone(&x.values))
247 .collect();
248 partition(&cols)?.ranges()
249 })
250}
251
252pub fn quote_identifier(s: &str) -> Cow<'_, str> {
257 if needs_quotes(s) {
258 Cow::Owned(format!("\"{}\"", s.replace('"', "\"\"")))
259 } else {
260 Cow::Borrowed(s)
261 }
262}
263
264fn needs_quotes(s: &str) -> bool {
266 let mut chars = s.chars();
267
268 if let Some(first_char) = chars.next()
270 && !(first_char.is_ascii_lowercase() || first_char == '_')
271 {
272 return true;
273 }
274
275 !chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
276}
277
278#[cfg(feature = "sql")]
279pub(crate) fn parse_identifiers(s: &str) -> Result<Vec<Ident>> {
280 let dialect = GenericDialect;
281 let mut parser = Parser::new(&dialect).try_with_sql(s)?;
282 let idents = parser.parse_multipart_identifier()?;
283 Ok(idents)
284}
285
286#[cfg(feature = "sql")]
290pub(crate) fn parse_identifiers_normalized(s: &str, ignore_case: bool) -> Vec<String> {
291 parse_identifiers(s)
292 .unwrap_or_default()
293 .into_iter()
294 .map(|id| match id.quote_style {
295 Some(_) => id.value,
296 None if ignore_case => id.value,
297 _ => id.value.to_ascii_lowercase(),
298 })
299 .collect::<Vec<_>>()
300}
301
302#[cfg(not(feature = "sql"))]
303pub(crate) fn parse_identifiers(s: &str) -> Result<Vec<String>> {
304 let mut result = Vec::new();
305 let mut current = String::new();
306 let mut in_quotes = false;
307
308 for ch in s.chars() {
309 match ch {
310 '"' => {
311 in_quotes = !in_quotes;
312 current.push(ch);
313 }
314 '.' if !in_quotes => {
315 result.push(current.clone());
316 current.clear();
317 }
318 _ => {
319 current.push(ch);
320 }
321 }
322 }
323
324 if !current.is_empty() {
326 result.push(current);
327 }
328
329 Ok(result)
330}
331
332#[cfg(not(feature = "sql"))]
333pub(crate) fn parse_identifiers_normalized(s: &str, ignore_case: bool) -> Vec<String> {
334 parse_identifiers(s)
335 .unwrap_or_default()
336 .into_iter()
337 .map(|id| {
338 let is_double_quoted = if id.len() > 2 {
339 let mut chars = id.chars();
340 chars.next() == Some('"') && chars.last() == Some('"')
341 } else {
342 false
343 };
344 if is_double_quoted {
345 id[1..id.len() - 1].to_string().replace("\"\"", "\"")
346 } else if ignore_case {
347 id
348 } else {
349 id.to_ascii_lowercase()
350 }
351 })
352 .collect::<Vec<_>>()
353}
354
355pub fn get_at_indices<T: Clone, I: Borrow<usize>>(
357 items: &[T],
358 indices: impl IntoIterator<Item = I>,
359) -> Result<Vec<T>> {
360 indices
361 .into_iter()
362 .map(|idx| items.get(*idx.borrow()).cloned())
363 .collect::<Option<Vec<T>>>()
364 .ok_or_else(|| {
365 _exec_datafusion_err!("Expects indices to be in the range of searched vector")
366 })
367}
368
369pub fn longest_consecutive_prefix<T: Borrow<usize>>(
375 sequence: impl IntoIterator<Item = T>,
376) -> usize {
377 let mut count = 0;
378 for item in sequence {
379 if !count.eq(item.borrow()) {
380 break;
381 }
382 count += 1;
383 }
384 count
385}
386
387#[derive(Debug, Clone)]
409pub struct SingleRowListArrayBuilder {
410 arr: ArrayRef,
412 nullable: bool,
414 field_name: Option<String>,
417}
418
419impl SingleRowListArrayBuilder {
420 pub fn new(arr: ArrayRef) -> Self {
422 Self {
423 arr,
424 nullable: true,
425 field_name: None,
426 }
427 }
428
429 pub fn with_nullable(mut self, nullable: bool) -> Self {
431 self.nullable = nullable;
432 self
433 }
434
435 pub fn with_field_name(mut self, field_name: Option<String>) -> Self {
437 self.field_name = field_name;
438 self
439 }
440
441 pub fn with_field(self, field: &Field) -> Self {
443 self.with_field_name(Some(field.name().to_owned()))
444 .with_nullable(field.is_nullable())
445 }
446
447 pub fn build_list_array(self) -> ListArray {
449 let (field, arr) = self.into_field_and_arr();
450 let offsets = OffsetBuffer::from_lengths([arr.len()]);
451 ListArray::new(field, offsets, arr, None)
452 }
453
454 pub fn build_list_scalar(self) -> ScalarValue {
456 ScalarValue::List(Arc::new(self.build_list_array()))
457 }
458
459 pub fn build_large_list_array(self) -> LargeListArray {
461 let (field, arr) = self.into_field_and_arr();
462 let offsets = OffsetBuffer::from_lengths([arr.len()]);
463 LargeListArray::new(field, offsets, arr, None)
464 }
465
466 pub fn build_large_list_scalar(self) -> ScalarValue {
468 ScalarValue::LargeList(Arc::new(self.build_large_list_array()))
469 }
470
471 pub fn build_fixed_size_list_array(self, list_size: usize) -> FixedSizeListArray {
473 let (field, arr) = self.into_field_and_arr();
474 FixedSizeListArray::new(field, list_size as i32, arr, None)
475 }
476
477 pub fn build_fixed_size_list_scalar(self, list_size: usize) -> ScalarValue {
479 ScalarValue::FixedSizeList(Arc::new(self.build_fixed_size_list_array(list_size)))
480 }
481
482 fn into_field_and_arr(self) -> (Arc<Field>, ArrayRef) {
484 let Self {
485 arr,
486 nullable,
487 field_name,
488 } = self;
489 let data_type = arr.data_type().to_owned();
490 let field = match field_name {
491 Some(name) => Field::new(name, data_type, nullable),
492 None => Field::new_list_field(data_type, nullable),
493 };
494 (Arc::new(field), arr)
495 }
496}
497
498pub fn arrays_into_list_array(
521 arr: impl IntoIterator<Item = ArrayRef>,
522) -> Result<ListArray> {
523 let arr = arr.into_iter().collect::<Vec<_>>();
524 assert_or_internal_err!(!arr.is_empty(), "Cannot wrap empty array into list array");
525
526 let lens = arr.iter().map(|x| x.len()).collect::<Vec<_>>();
527 let data_type = arr[0].data_type().to_owned();
529 let values = arr.iter().map(|x| x.as_ref()).collect::<Vec<_>>();
530 Ok(ListArray::new(
531 Arc::new(Field::new_list_field(data_type, true)),
532 OffsetBuffer::from_lengths(lens),
533 arrow::compute::concat(values.as_slice())?,
534 None,
535 ))
536}
537
538pub fn list_to_arrays<O: OffsetSizeTrait>(a: &ArrayRef) -> Vec<ArrayRef> {
540 a.as_list::<O>().iter().flatten().collect::<Vec<_>>()
541}
542
543pub fn fixed_size_list_to_arrays(a: &ArrayRef) -> Vec<ArrayRef> {
545 a.as_fixed_size_list().iter().flatten().collect::<Vec<_>>()
546}
547
548pub fn base_type(data_type: &DataType) -> DataType {
564 match data_type {
565 DataType::List(field)
566 | DataType::LargeList(field)
567 | DataType::FixedSizeList(field, _) => base_type(field.data_type()),
568 _ => data_type.to_owned(),
569 }
570}
571
572#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
574pub enum ListCoercion {
575 FixedSizedListToList,
577}
578
579pub fn coerced_type_with_base_type_only(
593 data_type: &DataType,
594 base_type: &DataType,
595 array_coercion: Option<&ListCoercion>,
596) -> DataType {
597 match (data_type, array_coercion) {
598 (DataType::List(field), _)
599 | (DataType::FixedSizeList(field, _), Some(ListCoercion::FixedSizedListToList)) =>
600 {
601 let field_type = coerced_type_with_base_type_only(
602 field.data_type(),
603 base_type,
604 array_coercion,
605 );
606
607 DataType::List(Arc::new(Field::new(
608 field.name(),
609 field_type,
610 field.is_nullable(),
611 )))
612 }
613 (DataType::FixedSizeList(field, len), _) => {
614 let field_type = coerced_type_with_base_type_only(
615 field.data_type(),
616 base_type,
617 array_coercion,
618 );
619
620 DataType::FixedSizeList(
621 Arc::new(Field::new(field.name(), field_type, field.is_nullable())),
622 *len,
623 )
624 }
625 (DataType::LargeList(field), _) => {
626 let field_type = coerced_type_with_base_type_only(
627 field.data_type(),
628 base_type,
629 array_coercion,
630 );
631
632 DataType::LargeList(Arc::new(Field::new(
633 field.name(),
634 field_type,
635 field.is_nullable(),
636 )))
637 }
638
639 _ => base_type.clone(),
640 }
641}
642
643pub fn coerced_fixed_size_list_to_list(data_type: &DataType) -> DataType {
645 match data_type {
646 DataType::List(field) | DataType::FixedSizeList(field, _) => {
647 let field_type = coerced_fixed_size_list_to_list(field.data_type());
648
649 DataType::List(Arc::new(Field::new(
650 field.name(),
651 field_type,
652 field.is_nullable(),
653 )))
654 }
655 DataType::LargeList(field) => {
656 let field_type = coerced_fixed_size_list_to_list(field.data_type());
657
658 DataType::LargeList(Arc::new(Field::new(
659 field.name(),
660 field_type,
661 field.is_nullable(),
662 )))
663 }
664
665 _ => data_type.clone(),
666 }
667}
668
669pub fn list_ndims(data_type: &DataType) -> u64 {
671 match data_type {
672 DataType::List(field)
673 | DataType::LargeList(field)
674 | DataType::FixedSizeList(field, _) => 1 + list_ndims(field.data_type()),
675 _ => 0,
676 }
677}
678
679pub mod datafusion_strsim {
681 use std::cmp::min;
684 use std::str::Chars;
685
686 struct StringWrapper<'a>(&'a str);
687
688 impl<'b> IntoIterator for &StringWrapper<'b> {
689 type Item = char;
690 type IntoIter = Chars<'b>;
691
692 fn into_iter(self) -> Self::IntoIter {
693 self.0.chars()
694 }
695 }
696
697 fn generic_levenshtein_with_buffer<'a, 'b, Iter1, Iter2, Elem1, Elem2>(
703 a: &'a Iter1,
704 b: &'b Iter2,
705 cache: &mut Vec<usize>,
706 ) -> usize
707 where
708 &'a Iter1: IntoIterator<Item = Elem1>,
709 &'b Iter2: IntoIterator<Item = Elem2>,
710 Elem1: PartialEq<Elem2>,
711 {
712 let b_len = b.into_iter().count();
713
714 if a.into_iter().next().is_none() {
715 return b_len;
716 }
717
718 cache.clear();
720 cache.extend(1..=b_len);
721
722 let mut result = 0;
723
724 for (i, a_elem) in a.into_iter().enumerate() {
725 result = i + 1;
726 let mut distance_b = i;
727
728 for (j, b_elem) in b.into_iter().enumerate() {
729 let cost = if a_elem == b_elem { 0usize } else { 1usize };
730 let distance_a = distance_b + cost;
731 distance_b = cache[j];
732 result = min(result + 1, min(distance_a, distance_b + 1));
733 cache[j] = result;
734 }
735 }
736
737 result
738 }
739
740 fn generic_levenshtein<'a, 'b, Iter1, Iter2, Elem1, Elem2>(
743 a: &'a Iter1,
744 b: &'b Iter2,
745 ) -> usize
746 where
747 &'a Iter1: IntoIterator<Item = Elem1>,
748 &'b Iter2: IntoIterator<Item = Elem2>,
749 Elem1: PartialEq<Elem2>,
750 {
751 let mut cache = Vec::new();
752 generic_levenshtein_with_buffer(a, b, &mut cache)
753 }
754
755 pub fn levenshtein(a: &str, b: &str) -> usize {
764 generic_levenshtein(&StringWrapper(a), &StringWrapper(b))
765 }
766
767 pub fn levenshtein_with_buffer(a: &str, b: &str, cache: &mut Vec<usize>) -> usize {
773 generic_levenshtein_with_buffer(&StringWrapper(a), &StringWrapper(b), cache)
774 }
775
776 pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
790 if a.is_empty() && b.is_empty() {
791 return 1.0;
792 }
793 1.0 - (levenshtein(a, b) as f64)
794 / (a.chars().count().max(b.chars().count()) as f64)
795 }
796}
797
798pub fn merge_and_order_indices<T: Borrow<usize>, S: Borrow<usize>>(
801 first: impl IntoIterator<Item = T>,
802 second: impl IntoIterator<Item = S>,
803) -> Vec<usize> {
804 let mut result: Vec<_> = first
805 .into_iter()
806 .map(|e| *e.borrow())
807 .chain(second.into_iter().map(|e| *e.borrow()))
808 .collect::<HashSet<_>>()
809 .into_iter()
810 .collect();
811 result.sort();
812 result
813}
814
815pub fn set_difference<T: Borrow<usize>, S: Borrow<usize>>(
818 first: impl IntoIterator<Item = T>,
819 second: impl IntoIterator<Item = S>,
820) -> Vec<usize> {
821 let set: HashSet<_> = second.into_iter().map(|e| *e.borrow()).collect();
822 first
823 .into_iter()
824 .map(|e| *e.borrow())
825 .filter(|e| !set.contains(e))
826 .collect()
827}
828
829pub fn find_indices<T: PartialEq, S: Borrow<T>>(
832 items: &[T],
833 targets: impl IntoIterator<Item = S>,
834) -> Result<Vec<usize>> {
835 targets
836 .into_iter()
837 .map(|target| items.iter().position(|e| target.borrow().eq(e)))
838 .collect::<Option<_>>()
839 .ok_or_else(|| _exec_datafusion_err!("Target not found"))
840}
841
842pub fn transpose<T>(original: Vec<Vec<T>>) -> Vec<Vec<T>> {
844 match original.as_slice() {
845 [] => vec![],
846 [first, ..] => {
847 let mut result = (0..first.len()).map(|_| vec![]).collect::<Vec<_>>();
848 for row in original {
849 for (item, transposed_row) in row.into_iter().zip(&mut result) {
850 transposed_row.push(item);
851 }
852 }
853 result
854 }
855 }
856}
857
858pub fn combine_limit(
902 parent_skip: usize,
903 parent_fetch: Option<usize>,
904 child_skip: usize,
905 child_fetch: Option<usize>,
906) -> (usize, Option<usize>) {
907 let combined_skip = child_skip.saturating_add(parent_skip);
908
909 let combined_fetch = match (parent_fetch, child_fetch) {
910 (Some(parent_fetch), Some(child_fetch)) => {
911 Some(min(parent_fetch, child_fetch.saturating_sub(parent_skip)))
912 }
913 (Some(parent_fetch), None) => Some(parent_fetch),
914 (None, Some(child_fetch)) => Some(child_fetch.saturating_sub(parent_skip)),
915 (None, None) => None,
916 };
917
918 (combined_skip, combined_fetch)
919}
920
921pub fn get_available_parallelism() -> usize {
926 available_parallelism()
927 .unwrap_or(NonZero::new(1).expect("literal value `1` shouldn't be zero"))
928 .get()
929}
930
931pub fn take_function_args<const N: usize, T>(
958 function_name: &str,
959 args: impl IntoIterator<Item = T>,
960) -> Result<[T; N]> {
961 let args = args.into_iter().collect::<Vec<_>>();
962 args.try_into().map_err(|v: Vec<T>| {
963 _exec_datafusion_err!(
964 "{} function requires {} {}, got {}",
965 function_name,
966 N,
967 if N == 1 { "argument" } else { "arguments" },
968 v.len()
969 )
970 })
971}
972
973#[cfg(test)]
974mod tests {
975 use super::*;
976 use crate::ScalarValue::Null;
977 use arrow::array::Float64Array;
978
979 #[test]
980 fn test_bisect_linear_left_and_right() -> Result<()> {
981 let arrays: Vec<ArrayRef> = vec![
982 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.])),
983 Arc::new(Float64Array::from(vec![2.0, 3.0, 3.0, 4.0, 5.0])),
984 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 10., 11.0])),
985 Arc::new(Float64Array::from(vec![15.0, 13.0, 8.0, 5., 0.0])),
986 ];
987 let search_tuple: Vec<ScalarValue> = vec![
988 ScalarValue::Float64(Some(8.0)),
989 ScalarValue::Float64(Some(3.0)),
990 ScalarValue::Float64(Some(8.0)),
991 ScalarValue::Float64(Some(8.0)),
992 ];
993 let ords = [
994 SortOptions {
995 descending: false,
996 nulls_first: true,
997 },
998 SortOptions {
999 descending: false,
1000 nulls_first: true,
1001 },
1002 SortOptions {
1003 descending: false,
1004 nulls_first: true,
1005 },
1006 SortOptions {
1007 descending: true,
1008 nulls_first: true,
1009 },
1010 ];
1011 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1012 assert_eq!(res, 2);
1013 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1014 assert_eq!(res, 3);
1015 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1016 assert_eq!(res, 2);
1017 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1018 assert_eq!(res, 3);
1019 Ok(())
1020 }
1021
1022 #[test]
1023 fn vector_ord() {
1024 assert!(vec![1, 0, 0, 0, 0, 0, 0, 1] < vec![1, 0, 0, 0, 0, 0, 0, 2]);
1025 assert!(vec![1, 0, 0, 0, 0, 0, 1, 1] > vec![1, 0, 0, 0, 0, 0, 0, 2]);
1026 assert!(
1027 vec![
1028 ScalarValue::Int32(Some(2)),
1029 Null,
1030 ScalarValue::Int32(Some(0)),
1031 ] < vec![
1032 ScalarValue::Int32(Some(2)),
1033 Null,
1034 ScalarValue::Int32(Some(1)),
1035 ]
1036 );
1037 assert!(
1038 vec![
1039 ScalarValue::Int32(Some(2)),
1040 ScalarValue::Int32(None),
1041 ScalarValue::Int32(Some(0)),
1042 ] < vec![
1043 ScalarValue::Int32(Some(2)),
1044 ScalarValue::Int32(None),
1045 ScalarValue::Int32(Some(1)),
1046 ]
1047 );
1048 }
1049
1050 #[test]
1051 fn ord_same_type() {
1052 assert!((ScalarValue::Int32(Some(2)) < ScalarValue::Int32(Some(3))));
1053 }
1054
1055 #[test]
1056 fn test_bisect_linear_left_and_right_diff_sort() -> Result<()> {
1057 let arrays: Vec<ArrayRef> =
1059 vec![Arc::new(Float64Array::from(vec![4.0, 3.0, 2.0, 1.0, 0.0]))];
1060 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(4.0))];
1061 let ords = [SortOptions {
1062 descending: true,
1063 nulls_first: true,
1064 }];
1065 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1066 assert_eq!(res, 0);
1067 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1068 assert_eq!(res, 0);
1069
1070 let arrays: Vec<ArrayRef> =
1072 vec![Arc::new(Float64Array::from(vec![4.0, 3.0, 2.0, 1.0, 0.0]))];
1073 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(4.0))];
1074 let ords = [SortOptions {
1075 descending: true,
1076 nulls_first: true,
1077 }];
1078 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1079 assert_eq!(res, 1);
1080 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1081 assert_eq!(res, 1);
1082
1083 let arrays: Vec<ArrayRef> =
1085 vec![Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.]))];
1086 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(7.0))];
1087 let ords = [SortOptions {
1088 descending: false,
1089 nulls_first: true,
1090 }];
1091 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1092 assert_eq!(res, 1);
1093 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1094 assert_eq!(res, 1);
1095
1096 let arrays: Vec<ArrayRef> =
1098 vec![Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.]))];
1099 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(7.0))];
1100 let ords = [SortOptions {
1101 descending: false,
1102 nulls_first: true,
1103 }];
1104 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1105 assert_eq!(res, 2);
1106 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1107 assert_eq!(res, 2);
1108
1109 let arrays: Vec<ArrayRef> = vec![
1110 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 8.0, 9., 10.])),
1111 Arc::new(Float64Array::from(vec![10.0, 9.0, 8.0, 7.5, 7., 6.])),
1112 ];
1113 let search_tuple: Vec<ScalarValue> = vec![
1114 ScalarValue::Float64(Some(8.0)),
1115 ScalarValue::Float64(Some(8.0)),
1116 ];
1117 let ords = [
1118 SortOptions {
1119 descending: false,
1120 nulls_first: true,
1121 },
1122 SortOptions {
1123 descending: true,
1124 nulls_first: true,
1125 },
1126 ];
1127 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1128 assert_eq!(res, 3);
1129 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1130 assert_eq!(res, 3);
1131
1132 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1133 assert_eq!(res, 2);
1134 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1135 assert_eq!(res, 2);
1136 Ok(())
1137 }
1138
1139 #[test]
1140 fn test_evaluate_partition_ranges() -> Result<()> {
1141 let arrays: Vec<ArrayRef> = vec![
1142 Arc::new(Float64Array::from(vec![1.0, 1.0, 1.0, 2.0, 2.0, 2.0])),
1143 Arc::new(Float64Array::from(vec![4.0, 4.0, 3.0, 2.0, 1.0, 1.0])),
1144 ];
1145 let n_row = arrays[0].len();
1146 let options: Vec<SortOptions> = vec![
1147 SortOptions {
1148 descending: false,
1149 nulls_first: false,
1150 },
1151 SortOptions {
1152 descending: true,
1153 nulls_first: false,
1154 },
1155 ];
1156 let sort_columns = arrays
1157 .into_iter()
1158 .zip(options)
1159 .map(|(values, options)| SortColumn {
1160 values,
1161 options: Some(options),
1162 })
1163 .collect::<Vec<_>>();
1164 let ranges = evaluate_partition_ranges(n_row, &sort_columns)?;
1165 assert_eq!(ranges.len(), 4);
1166 assert_eq!(ranges[0], Range { start: 0, end: 2 });
1167 assert_eq!(ranges[1], Range { start: 2, end: 3 });
1168 assert_eq!(ranges[2], Range { start: 3, end: 4 });
1169 assert_eq!(ranges[3], Range { start: 4, end: 6 });
1170 Ok(())
1171 }
1172
1173 #[cfg(feature = "sql")]
1174 #[test]
1175 fn test_quote_identifier() -> Result<()> {
1176 let cases = vec![
1177 ("foo", r#"foo"#),
1178 ("_foo", r#"_foo"#),
1179 ("foo_bar", r#"foo_bar"#),
1180 ("foo-bar", r#""foo-bar""#),
1181 ("foo.bar", r#""foo.bar""#),
1183 ("Foo", r#""Foo""#),
1184 ("Foo.Bar", r#""Foo.Bar""#),
1185 ("test1", r#"test1"#),
1187 ("1test", r#""1test""#),
1188 ];
1189
1190 for (identifier, quoted_identifier) in cases {
1191 println!("input: \n{identifier}\nquoted_identifier:\n{quoted_identifier}");
1192
1193 assert_eq!(quote_identifier(identifier), quoted_identifier);
1194
1195 let quote_style = if quoted_identifier.starts_with('"') {
1198 Some('"')
1199 } else {
1200 None
1201 };
1202
1203 let expected_parsed = vec![Ident {
1204 value: identifier.to_string(),
1205 quote_style,
1206 span: sqlparser::tokenizer::Span::empty(),
1207 }];
1208
1209 assert_eq!(
1210 parse_identifiers(quoted_identifier).unwrap(),
1211 expected_parsed
1212 );
1213 }
1214
1215 Ok(())
1216 }
1217
1218 #[test]
1219 fn test_get_at_indices() -> Result<()> {
1220 let in_vec = vec![1, 2, 3, 4, 5, 6, 7];
1221 assert_eq!(get_at_indices(&in_vec, [0, 2])?, vec![1, 3]);
1222 assert_eq!(get_at_indices(&in_vec, [4, 2])?, vec![5, 3]);
1223 assert!(get_at_indices(&in_vec, [7]).is_err());
1225 Ok(())
1226 }
1227
1228 #[test]
1229 fn test_longest_consecutive_prefix() {
1230 assert_eq!(longest_consecutive_prefix([0, 3, 4]), 1);
1231 assert_eq!(longest_consecutive_prefix([0, 1, 3, 4]), 2);
1232 assert_eq!(longest_consecutive_prefix([0, 1, 2, 3, 4]), 5);
1233 assert_eq!(longest_consecutive_prefix([1, 2, 3, 4]), 0);
1234 }
1235
1236 #[test]
1237 fn test_merge_and_order_indices() {
1238 assert_eq!(
1239 merge_and_order_indices([0, 3, 4], [1, 3, 5]),
1240 vec![0, 1, 3, 4, 5]
1241 );
1242 assert_eq!(
1244 merge_and_order_indices([3, 0, 4], [5, 1, 3]),
1245 vec![0, 1, 3, 4, 5]
1246 );
1247 }
1248
1249 #[test]
1250 fn test_set_difference() {
1251 assert_eq!(set_difference([0, 3, 4], [1, 2]), vec![0, 3, 4]);
1252 assert_eq!(set_difference([0, 3, 4], [1, 2, 4]), vec![0, 3]);
1253 assert_eq!(set_difference([3, 4, 0], [1, 2, 4]), vec![3, 0]);
1255 assert_eq!(set_difference([0, 3, 4], [4, 1, 2]), vec![0, 3]);
1256 assert_eq!(set_difference([3, 4, 0], [4, 1, 2]), vec![3, 0]);
1257 }
1258
1259 #[test]
1260 fn test_find_indices() -> Result<()> {
1261 assert_eq!(find_indices(&[0, 3, 4], [0, 3, 4])?, vec![0, 1, 2]);
1262 assert_eq!(find_indices(&[0, 3, 4], [0, 4, 3])?, vec![0, 2, 1]);
1263 assert_eq!(find_indices(&[3, 0, 4], [0, 3])?, vec![1, 0]);
1264 assert!(find_indices(&[0, 3], [0, 3, 4]).is_err());
1265 assert!(find_indices(&[0, 3, 4], [0, 2]).is_err());
1266 Ok(())
1267 }
1268
1269 #[test]
1270 fn test_transpose() -> Result<()> {
1271 let in_data = vec![vec![1, 2, 3], vec![4, 5, 6]];
1272 let transposed = transpose(in_data);
1273 let expected = vec![vec![1, 4], vec![2, 5], vec![3, 6]];
1274 assert_eq!(expected, transposed);
1275 Ok(())
1276 }
1277}