1pub mod expr;
21pub mod memory;
22pub mod proxy;
23pub mod string_utils;
24
25use crate::error::{_exec_datafusion_err, _internal_datafusion_err, _internal_err};
26use crate::{DataFusionError, Result, ScalarValue};
27use arrow::array::{
28 cast::AsArray, Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray,
29 OffsetSizeTrait,
30};
31use arrow::buffer::OffsetBuffer;
32use arrow::compute::{partition, SortColumn, SortOptions};
33use arrow::datatypes::{DataType, Field, SchemaRef};
34use sqlparser::ast::Ident;
35use sqlparser::dialect::GenericDialect;
36use sqlparser::parser::Parser;
37use std::borrow::{Borrow, Cow};
38use std::cmp::{min, Ordering};
39use std::collections::HashSet;
40use std::num::NonZero;
41use std::ops::Range;
42use std::sync::Arc;
43use std::thread::available_parallelism;
44
45pub fn project_schema(
75 schema: &SchemaRef,
76 projection: Option<&Vec<usize>>,
77) -> Result<SchemaRef> {
78 let schema = match projection {
79 Some(columns) => Arc::new(schema.project(columns)?),
80 None => Arc::clone(schema),
81 };
82 Ok(schema)
83}
84
85pub fn extract_row_at_idx_to_buf(
87 columns: &[ArrayRef],
88 idx: usize,
89 buf: &mut Vec<ScalarValue>,
90) -> Result<()> {
91 buf.clear();
92
93 let iter = columns
94 .iter()
95 .map(|arr| ScalarValue::try_from_array(arr, idx));
96 for v in iter.into_iter() {
97 buf.push(v?);
98 }
99
100 Ok(())
101}
102pub fn get_row_at_idx(columns: &[ArrayRef], idx: usize) -> Result<Vec<ScalarValue>> {
104 columns
105 .iter()
106 .map(|arr| ScalarValue::try_from_array(arr, idx))
107 .collect()
108}
109
110pub fn compare_rows(
112 x: &[ScalarValue],
113 y: &[ScalarValue],
114 sort_options: &[SortOptions],
115) -> Result<Ordering> {
116 let zip_it = x.iter().zip(y.iter()).zip(sort_options.iter());
117 for ((lhs, rhs), sort_options) in zip_it {
119 let result = match (lhs.is_null(), rhs.is_null(), sort_options.nulls_first) {
121 (true, false, false) | (false, true, true) => Ordering::Greater,
122 (true, false, true) | (false, true, false) => Ordering::Less,
123 (false, false, _) => if sort_options.descending {
124 rhs.partial_cmp(lhs)
125 } else {
126 lhs.partial_cmp(rhs)
127 }
128 .ok_or_else(|| {
129 _internal_datafusion_err!("Column array shouldn't be empty")
130 })?,
131 (true, true, _) => continue,
132 };
133 if result != Ordering::Equal {
134 return Ok(result);
135 }
136 }
137 Ok(Ordering::Equal)
138}
139
140pub fn bisect<const SIDE: bool>(
145 item_columns: &[ArrayRef],
146 target: &[ScalarValue],
147 sort_options: &[SortOptions],
148) -> Result<usize> {
149 let low: usize = 0;
150 let high: usize = item_columns
151 .first()
152 .ok_or_else(|| {
153 DataFusionError::Internal("Column array shouldn't be empty".to_string())
154 })?
155 .len();
156 let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
157 let cmp = compare_rows(current, target, sort_options)?;
158 Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
159 };
160 find_bisect_point(item_columns, target, compare_fn, low, high)
161}
162
163pub fn find_bisect_point<F>(
170 item_columns: &[ArrayRef],
171 target: &[ScalarValue],
172 compare_fn: F,
173 mut low: usize,
174 mut high: usize,
175) -> Result<usize>
176where
177 F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
178{
179 while low < high {
180 let mid = ((high - low) / 2) + low;
181 let val = get_row_at_idx(item_columns, mid)?;
182 if compare_fn(&val, target)? {
183 low = mid + 1;
184 } else {
185 high = mid;
186 }
187 }
188 Ok(low)
189}
190
191pub fn linear_search<const SIDE: bool>(
196 item_columns: &[ArrayRef],
197 target: &[ScalarValue],
198 sort_options: &[SortOptions],
199) -> Result<usize> {
200 let low: usize = 0;
201 let high: usize = item_columns
202 .first()
203 .ok_or_else(|| {
204 DataFusionError::Internal("Column array shouldn't be empty".to_string())
205 })?
206 .len();
207 let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
208 let cmp = compare_rows(current, target, sort_options)?;
209 Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
210 };
211 search_in_slice(item_columns, target, compare_fn, low, high)
212}
213
214pub fn search_in_slice<F>(
219 item_columns: &[ArrayRef],
220 target: &[ScalarValue],
221 compare_fn: F,
222 mut low: usize,
223 high: usize,
224) -> Result<usize>
225where
226 F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
227{
228 while low < high {
229 let val = get_row_at_idx(item_columns, low)?;
230 if !compare_fn(&val, target)? {
231 break;
232 }
233 low += 1;
234 }
235 Ok(low)
236}
237
238pub fn evaluate_partition_ranges(
243 num_rows: usize,
244 partition_columns: &[SortColumn],
245) -> Result<Vec<Range<usize>>> {
246 Ok(if partition_columns.is_empty() {
247 vec![Range {
248 start: 0,
249 end: num_rows,
250 }]
251 } else {
252 let cols: Vec<_> = partition_columns
253 .iter()
254 .map(|x| Arc::clone(&x.values))
255 .collect();
256 partition(&cols)?.ranges()
257 })
258}
259
260pub fn quote_identifier(s: &str) -> Cow<str> {
265 if needs_quotes(s) {
266 Cow::Owned(format!("\"{}\"", s.replace('"', "\"\"")))
267 } else {
268 Cow::Borrowed(s)
269 }
270}
271
272fn needs_quotes(s: &str) -> bool {
274 let mut chars = s.chars();
275
276 if let Some(first_char) = chars.next() {
278 if !(first_char.is_ascii_lowercase() || first_char == '_') {
279 return true;
280 }
281 }
282
283 !chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
284}
285
286pub(crate) fn parse_identifiers(s: &str) -> Result<Vec<Ident>> {
287 let dialect = GenericDialect;
288 let mut parser = Parser::new(&dialect).try_with_sql(s)?;
289 let idents = parser.parse_multipart_identifier()?;
290 Ok(idents)
291}
292
293pub(crate) fn parse_identifiers_normalized(s: &str, ignore_case: bool) -> Vec<String> {
294 parse_identifiers(s)
295 .unwrap_or_default()
296 .into_iter()
297 .map(|id| match id.quote_style {
298 Some(_) => id.value,
299 None if ignore_case => id.value,
300 _ => id.value.to_ascii_lowercase(),
301 })
302 .collect::<Vec<_>>()
303}
304
305pub fn get_at_indices<T: Clone, I: Borrow<usize>>(
307 items: &[T],
308 indices: impl IntoIterator<Item = I>,
309) -> Result<Vec<T>> {
310 indices
311 .into_iter()
312 .map(|idx| items.get(*idx.borrow()).cloned())
313 .collect::<Option<Vec<T>>>()
314 .ok_or_else(|| {
315 DataFusionError::Execution(
316 "Expects indices to be in the range of searched vector".to_string(),
317 )
318 })
319}
320
321pub fn longest_consecutive_prefix<T: Borrow<usize>>(
327 sequence: impl IntoIterator<Item = T>,
328) -> usize {
329 let mut count = 0;
330 for item in sequence {
331 if !count.eq(item.borrow()) {
332 break;
333 }
334 count += 1;
335 }
336 count
337}
338
339#[derive(Debug, Clone)]
359pub struct SingleRowListArrayBuilder {
360 arr: ArrayRef,
362 nullable: bool,
364 field_name: Option<String>,
367}
368
369impl SingleRowListArrayBuilder {
370 pub fn new(arr: ArrayRef) -> Self {
372 Self {
373 arr,
374 nullable: true,
375 field_name: None,
376 }
377 }
378
379 pub fn with_nullable(mut self, nullable: bool) -> Self {
381 self.nullable = nullable;
382 self
383 }
384
385 pub fn with_field_name(mut self, field_name: Option<String>) -> Self {
387 self.field_name = field_name;
388 self
389 }
390
391 pub fn with_field(self, field: &Field) -> Self {
393 self.with_field_name(Some(field.name().to_owned()))
394 .with_nullable(field.is_nullable())
395 }
396
397 pub fn build_list_array(self) -> ListArray {
399 let (field, arr) = self.into_field_and_arr();
400 let offsets = OffsetBuffer::from_lengths([arr.len()]);
401 ListArray::new(field, offsets, arr, None)
402 }
403
404 pub fn build_list_scalar(self) -> ScalarValue {
406 ScalarValue::List(Arc::new(self.build_list_array()))
407 }
408
409 pub fn build_large_list_array(self) -> LargeListArray {
411 let (field, arr) = self.into_field_and_arr();
412 let offsets = OffsetBuffer::from_lengths([arr.len()]);
413 LargeListArray::new(field, offsets, arr, None)
414 }
415
416 pub fn build_large_list_scalar(self) -> ScalarValue {
418 ScalarValue::LargeList(Arc::new(self.build_large_list_array()))
419 }
420
421 pub fn build_fixed_size_list_array(self, list_size: usize) -> FixedSizeListArray {
423 let (field, arr) = self.into_field_and_arr();
424 FixedSizeListArray::new(field, list_size as i32, arr, None)
425 }
426
427 pub fn build_fixed_size_list_scalar(self, list_size: usize) -> ScalarValue {
429 ScalarValue::FixedSizeList(Arc::new(self.build_fixed_size_list_array(list_size)))
430 }
431
432 fn into_field_and_arr(self) -> (Arc<Field>, ArrayRef) {
434 let Self {
435 arr,
436 nullable,
437 field_name,
438 } = self;
439 let data_type = arr.data_type().to_owned();
440 let field = match field_name {
441 Some(name) => Field::new(name, data_type, nullable),
442 None => Field::new_list_field(data_type, nullable),
443 };
444 (Arc::new(field), arr)
445 }
446}
447
448#[deprecated(
452 since = "44.0.0",
453 note = "please use `SingleRowListArrayBuilder` instead"
454)]
455pub fn array_into_list_array_nullable(arr: ArrayRef) -> ListArray {
456 SingleRowListArrayBuilder::new(arr)
457 .with_nullable(true)
458 .build_list_array()
459}
460
461#[deprecated(
464 since = "44.0.0",
465 note = "please use `SingleRowListArrayBuilder` instead"
466)]
467pub fn array_into_list_array(arr: ArrayRef, nullable: bool) -> ListArray {
468 SingleRowListArrayBuilder::new(arr)
469 .with_nullable(nullable)
470 .build_list_array()
471}
472
473#[deprecated(
474 since = "44.0.0",
475 note = "please use `SingleRowListArrayBuilder` instead"
476)]
477pub fn array_into_list_array_with_field_name(
478 arr: ArrayRef,
479 nullable: bool,
480 field_name: &str,
481) -> ListArray {
482 SingleRowListArrayBuilder::new(arr)
483 .with_nullable(nullable)
484 .with_field_name(Some(field_name.to_string()))
485 .build_list_array()
486}
487
488#[deprecated(
491 since = "44.0.0",
492 note = "please use `SingleRowListArrayBuilder` instead"
493)]
494pub fn array_into_large_list_array(arr: ArrayRef) -> LargeListArray {
495 SingleRowListArrayBuilder::new(arr).build_large_list_array()
496}
497
498#[deprecated(
499 since = "44.0.0",
500 note = "please use `SingleRowListArrayBuilder` instead"
501)]
502pub fn array_into_large_list_array_with_field_name(
503 arr: ArrayRef,
504 field_name: &str,
505) -> LargeListArray {
506 SingleRowListArrayBuilder::new(arr)
507 .with_field_name(Some(field_name.to_string()))
508 .build_large_list_array()
509}
510
511#[deprecated(
512 since = "44.0.0",
513 note = "please use `SingleRowListArrayBuilder` instead"
514)]
515pub fn array_into_fixed_size_list_array(
516 arr: ArrayRef,
517 list_size: usize,
518) -> FixedSizeListArray {
519 SingleRowListArrayBuilder::new(arr).build_fixed_size_list_array(list_size)
520}
521
522#[deprecated(
523 since = "44.0.0",
524 note = "please use `SingleRowListArrayBuilder` instead"
525)]
526pub fn array_into_fixed_size_list_array_with_field_name(
527 arr: ArrayRef,
528 list_size: usize,
529 field_name: &str,
530) -> FixedSizeListArray {
531 SingleRowListArrayBuilder::new(arr)
532 .with_field_name(Some(field_name.to_string()))
533 .build_fixed_size_list_array(list_size)
534}
535
536pub fn arrays_into_list_array(
558 arr: impl IntoIterator<Item = ArrayRef>,
559) -> Result<ListArray> {
560 let arr = arr.into_iter().collect::<Vec<_>>();
561 if arr.is_empty() {
562 return _internal_err!("Cannot wrap empty array into list array");
563 }
564
565 let lens = arr.iter().map(|x| x.len()).collect::<Vec<_>>();
566 let data_type = arr[0].data_type().to_owned();
568 let values = arr.iter().map(|x| x.as_ref()).collect::<Vec<_>>();
569 Ok(ListArray::new(
570 Arc::new(Field::new_list_field(data_type, true)),
571 OffsetBuffer::from_lengths(lens),
572 arrow::compute::concat(values.as_slice())?,
573 None,
574 ))
575}
576
577pub fn list_to_arrays<O: OffsetSizeTrait>(a: &ArrayRef) -> Vec<ArrayRef> {
579 a.as_list::<O>().iter().flatten().collect::<Vec<_>>()
580}
581
582pub fn fixed_size_list_to_arrays(a: &ArrayRef) -> Vec<ArrayRef> {
584 a.as_fixed_size_list().iter().flatten().collect::<Vec<_>>()
585}
586
587pub fn base_type(data_type: &DataType) -> DataType {
602 match data_type {
603 DataType::List(field)
604 | DataType::LargeList(field)
605 | DataType::FixedSizeList(field, _) => base_type(field.data_type()),
606 _ => data_type.to_owned(),
607 }
608}
609
610#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
612pub enum ListCoercion {
613 FixedSizedListToList,
615}
616
617pub fn coerced_type_with_base_type_only(
630 data_type: &DataType,
631 base_type: &DataType,
632 array_coercion: Option<&ListCoercion>,
633) -> DataType {
634 match (data_type, array_coercion) {
635 (DataType::List(field), _)
636 | (DataType::FixedSizeList(field, _), Some(ListCoercion::FixedSizedListToList)) =>
637 {
638 let field_type = coerced_type_with_base_type_only(
639 field.data_type(),
640 base_type,
641 array_coercion,
642 );
643
644 DataType::List(Arc::new(Field::new(
645 field.name(),
646 field_type,
647 field.is_nullable(),
648 )))
649 }
650 (DataType::FixedSizeList(field, len), _) => {
651 let field_type = coerced_type_with_base_type_only(
652 field.data_type(),
653 base_type,
654 array_coercion,
655 );
656
657 DataType::FixedSizeList(
658 Arc::new(Field::new(field.name(), field_type, field.is_nullable())),
659 *len,
660 )
661 }
662 (DataType::LargeList(field), _) => {
663 let field_type = coerced_type_with_base_type_only(
664 field.data_type(),
665 base_type,
666 array_coercion,
667 );
668
669 DataType::LargeList(Arc::new(Field::new(
670 field.name(),
671 field_type,
672 field.is_nullable(),
673 )))
674 }
675
676 _ => base_type.clone(),
677 }
678}
679
680pub fn coerced_fixed_size_list_to_list(data_type: &DataType) -> DataType {
682 match data_type {
683 DataType::List(field) | DataType::FixedSizeList(field, _) => {
684 let field_type = coerced_fixed_size_list_to_list(field.data_type());
685
686 DataType::List(Arc::new(Field::new(
687 field.name(),
688 field_type,
689 field.is_nullable(),
690 )))
691 }
692 DataType::LargeList(field) => {
693 let field_type = coerced_fixed_size_list_to_list(field.data_type());
694
695 DataType::LargeList(Arc::new(Field::new(
696 field.name(),
697 field_type,
698 field.is_nullable(),
699 )))
700 }
701
702 _ => data_type.clone(),
703 }
704}
705
706pub fn list_ndims(data_type: &DataType) -> u64 {
708 match data_type {
709 DataType::List(field)
710 | DataType::LargeList(field)
711 | DataType::FixedSizeList(field, _) => 1 + list_ndims(field.data_type()),
712 _ => 0,
713 }
714}
715
716pub mod datafusion_strsim {
718 use std::cmp::min;
721 use std::str::Chars;
722
723 struct StringWrapper<'a>(&'a str);
724
725 impl<'b> IntoIterator for &StringWrapper<'b> {
726 type Item = char;
727 type IntoIter = Chars<'b>;
728
729 fn into_iter(self) -> Self::IntoIter {
730 self.0.chars()
731 }
732 }
733
734 fn generic_levenshtein<'a, 'b, Iter1, Iter2, Elem1, Elem2>(
737 a: &'a Iter1,
738 b: &'b Iter2,
739 ) -> usize
740 where
741 &'a Iter1: IntoIterator<Item = Elem1>,
742 &'b Iter2: IntoIterator<Item = Elem2>,
743 Elem1: PartialEq<Elem2>,
744 {
745 let b_len = b.into_iter().count();
746
747 if a.into_iter().next().is_none() {
748 return b_len;
749 }
750
751 let mut cache: Vec<usize> = (1..b_len + 1).collect();
752
753 let mut result = 0;
754
755 for (i, a_elem) in a.into_iter().enumerate() {
756 result = i + 1;
757 let mut distance_b = i;
758
759 for (j, b_elem) in b.into_iter().enumerate() {
760 let cost = if a_elem == b_elem { 0usize } else { 1usize };
761 let distance_a = distance_b + cost;
762 distance_b = cache[j];
763 result = min(result + 1, min(distance_a, distance_b + 1));
764 cache[j] = result;
765 }
766 }
767
768 result
769 }
770
771 pub fn levenshtein(a: &str, b: &str) -> usize {
780 generic_levenshtein(&StringWrapper(a), &StringWrapper(b))
781 }
782
783 pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
797 if a.is_empty() && b.is_empty() {
798 return 1.0;
799 }
800 1.0 - (levenshtein(a, b) as f64)
801 / (a.chars().count().max(b.chars().count()) as f64)
802 }
803}
804
805pub fn merge_and_order_indices<T: Borrow<usize>, S: Borrow<usize>>(
808 first: impl IntoIterator<Item = T>,
809 second: impl IntoIterator<Item = S>,
810) -> Vec<usize> {
811 let mut result: Vec<_> = first
812 .into_iter()
813 .map(|e| *e.borrow())
814 .chain(second.into_iter().map(|e| *e.borrow()))
815 .collect::<HashSet<_>>()
816 .into_iter()
817 .collect();
818 result.sort();
819 result
820}
821
822pub fn set_difference<T: Borrow<usize>, S: Borrow<usize>>(
825 first: impl IntoIterator<Item = T>,
826 second: impl IntoIterator<Item = S>,
827) -> Vec<usize> {
828 let set: HashSet<_> = second.into_iter().map(|e| *e.borrow()).collect();
829 first
830 .into_iter()
831 .map(|e| *e.borrow())
832 .filter(|e| !set.contains(e))
833 .collect()
834}
835
836#[deprecated(since = "45.0.0", note = "Use std::Iterator::is_sorted instead")]
838pub fn is_sorted<T: Borrow<usize>>(sequence: impl IntoIterator<Item = T>) -> bool {
839 let mut previous = 0;
841 for item in sequence.into_iter() {
842 let current = *item.borrow();
843 if current < previous {
844 return false;
845 }
846 previous = current;
847 }
848 true
849}
850
851pub fn find_indices<T: PartialEq, S: Borrow<T>>(
854 items: &[T],
855 targets: impl IntoIterator<Item = S>,
856) -> Result<Vec<usize>> {
857 targets
858 .into_iter()
859 .map(|target| items.iter().position(|e| target.borrow().eq(e)))
860 .collect::<Option<_>>()
861 .ok_or_else(|| DataFusionError::Execution("Target not found".to_string()))
862}
863
864pub fn transpose<T>(original: Vec<Vec<T>>) -> Vec<Vec<T>> {
866 match original.as_slice() {
867 [] => vec![],
868 [first, ..] => {
869 let mut result = (0..first.len()).map(|_| vec![]).collect::<Vec<_>>();
870 for row in original {
871 for (item, transposed_row) in row.into_iter().zip(&mut result) {
872 transposed_row.push(item);
873 }
874 }
875 result
876 }
877 }
878}
879
880pub fn combine_limit(
924 parent_skip: usize,
925 parent_fetch: Option<usize>,
926 child_skip: usize,
927 child_fetch: Option<usize>,
928) -> (usize, Option<usize>) {
929 let combined_skip = child_skip.saturating_add(parent_skip);
930
931 let combined_fetch = match (parent_fetch, child_fetch) {
932 (Some(parent_fetch), Some(child_fetch)) => {
933 Some(min(parent_fetch, child_fetch.saturating_sub(parent_skip)))
934 }
935 (Some(parent_fetch), None) => Some(parent_fetch),
936 (None, Some(child_fetch)) => Some(child_fetch.saturating_sub(parent_skip)),
937 (None, None) => None,
938 };
939
940 (combined_skip, combined_fetch)
941}
942
943pub fn get_available_parallelism() -> usize {
948 available_parallelism()
949 .unwrap_or(NonZero::new(1).expect("literal value `1` shouldn't be zero"))
950 .get()
951}
952
953pub fn take_function_args<const N: usize, T>(
977 function_name: &str,
978 args: impl IntoIterator<Item = T>,
979) -> Result<[T; N]> {
980 let args = args.into_iter().collect::<Vec<_>>();
981 args.try_into().map_err(|v: Vec<T>| {
982 _exec_datafusion_err!(
983 "{} function requires {} {}, got {}",
984 function_name,
985 N,
986 if N == 1 { "argument" } else { "arguments" },
987 v.len()
988 )
989 })
990}
991
992#[cfg(test)]
993mod tests {
994 use super::*;
995 use crate::ScalarValue::Null;
996 use arrow::array::Float64Array;
997 use sqlparser::tokenizer::Span;
998
999 #[test]
1000 fn test_bisect_linear_left_and_right() -> Result<()> {
1001 let arrays: Vec<ArrayRef> = vec![
1002 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.])),
1003 Arc::new(Float64Array::from(vec![2.0, 3.0, 3.0, 4.0, 5.0])),
1004 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 10., 11.0])),
1005 Arc::new(Float64Array::from(vec![15.0, 13.0, 8.0, 5., 0.0])),
1006 ];
1007 let search_tuple: Vec<ScalarValue> = vec![
1008 ScalarValue::Float64(Some(8.0)),
1009 ScalarValue::Float64(Some(3.0)),
1010 ScalarValue::Float64(Some(8.0)),
1011 ScalarValue::Float64(Some(8.0)),
1012 ];
1013 let ords = [
1014 SortOptions {
1015 descending: false,
1016 nulls_first: true,
1017 },
1018 SortOptions {
1019 descending: false,
1020 nulls_first: true,
1021 },
1022 SortOptions {
1023 descending: false,
1024 nulls_first: true,
1025 },
1026 SortOptions {
1027 descending: true,
1028 nulls_first: true,
1029 },
1030 ];
1031 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1032 assert_eq!(res, 2);
1033 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1034 assert_eq!(res, 3);
1035 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1036 assert_eq!(res, 2);
1037 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1038 assert_eq!(res, 3);
1039 Ok(())
1040 }
1041
1042 #[test]
1043 fn vector_ord() {
1044 assert!(vec![1, 0, 0, 0, 0, 0, 0, 1] < vec![1, 0, 0, 0, 0, 0, 0, 2]);
1045 assert!(vec![1, 0, 0, 0, 0, 0, 1, 1] > vec![1, 0, 0, 0, 0, 0, 0, 2]);
1046 assert!(
1047 vec![
1048 ScalarValue::Int32(Some(2)),
1049 Null,
1050 ScalarValue::Int32(Some(0)),
1051 ] < vec![
1052 ScalarValue::Int32(Some(2)),
1053 Null,
1054 ScalarValue::Int32(Some(1)),
1055 ]
1056 );
1057 assert!(
1058 vec![
1059 ScalarValue::Int32(Some(2)),
1060 ScalarValue::Int32(None),
1061 ScalarValue::Int32(Some(0)),
1062 ] < vec![
1063 ScalarValue::Int32(Some(2)),
1064 ScalarValue::Int32(None),
1065 ScalarValue::Int32(Some(1)),
1066 ]
1067 );
1068 }
1069
1070 #[test]
1071 fn ord_same_type() {
1072 assert!((ScalarValue::Int32(Some(2)) < ScalarValue::Int32(Some(3))));
1073 }
1074
1075 #[test]
1076 fn test_bisect_linear_left_and_right_diff_sort() -> Result<()> {
1077 let arrays: Vec<ArrayRef> =
1079 vec![Arc::new(Float64Array::from(vec![4.0, 3.0, 2.0, 1.0, 0.0]))];
1080 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(4.0))];
1081 let ords = [SortOptions {
1082 descending: true,
1083 nulls_first: true,
1084 }];
1085 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1086 assert_eq!(res, 0);
1087 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1088 assert_eq!(res, 0);
1089
1090 let arrays: Vec<ArrayRef> =
1092 vec![Arc::new(Float64Array::from(vec![4.0, 3.0, 2.0, 1.0, 0.0]))];
1093 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(4.0))];
1094 let ords = [SortOptions {
1095 descending: true,
1096 nulls_first: true,
1097 }];
1098 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1099 assert_eq!(res, 1);
1100 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1101 assert_eq!(res, 1);
1102
1103 let arrays: Vec<ArrayRef> =
1105 vec![Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.]))];
1106 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(7.0))];
1107 let ords = [SortOptions {
1108 descending: false,
1109 nulls_first: true,
1110 }];
1111 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1112 assert_eq!(res, 1);
1113 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1114 assert_eq!(res, 1);
1115
1116 let arrays: Vec<ArrayRef> =
1118 vec![Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.]))];
1119 let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(7.0))];
1120 let ords = [SortOptions {
1121 descending: false,
1122 nulls_first: true,
1123 }];
1124 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1125 assert_eq!(res, 2);
1126 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1127 assert_eq!(res, 2);
1128
1129 let arrays: Vec<ArrayRef> = vec![
1130 Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 8.0, 9., 10.])),
1131 Arc::new(Float64Array::from(vec![10.0, 9.0, 8.0, 7.5, 7., 6.])),
1132 ];
1133 let search_tuple: Vec<ScalarValue> = vec![
1134 ScalarValue::Float64(Some(8.0)),
1135 ScalarValue::Float64(Some(8.0)),
1136 ];
1137 let ords = [
1138 SortOptions {
1139 descending: false,
1140 nulls_first: true,
1141 },
1142 SortOptions {
1143 descending: true,
1144 nulls_first: true,
1145 },
1146 ];
1147 let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1148 assert_eq!(res, 3);
1149 let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1150 assert_eq!(res, 3);
1151
1152 let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1153 assert_eq!(res, 2);
1154 let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1155 assert_eq!(res, 2);
1156 Ok(())
1157 }
1158
1159 #[test]
1160 fn test_evaluate_partition_ranges() -> Result<()> {
1161 let arrays: Vec<ArrayRef> = vec![
1162 Arc::new(Float64Array::from(vec![1.0, 1.0, 1.0, 2.0, 2.0, 2.0])),
1163 Arc::new(Float64Array::from(vec![4.0, 4.0, 3.0, 2.0, 1.0, 1.0])),
1164 ];
1165 let n_row = arrays[0].len();
1166 let options: Vec<SortOptions> = vec![
1167 SortOptions {
1168 descending: false,
1169 nulls_first: false,
1170 },
1171 SortOptions {
1172 descending: true,
1173 nulls_first: false,
1174 },
1175 ];
1176 let sort_columns = arrays
1177 .into_iter()
1178 .zip(options)
1179 .map(|(values, options)| SortColumn {
1180 values,
1181 options: Some(options),
1182 })
1183 .collect::<Vec<_>>();
1184 let ranges = evaluate_partition_ranges(n_row, &sort_columns)?;
1185 assert_eq!(ranges.len(), 4);
1186 assert_eq!(ranges[0], Range { start: 0, end: 2 });
1187 assert_eq!(ranges[1], Range { start: 2, end: 3 });
1188 assert_eq!(ranges[2], Range { start: 3, end: 4 });
1189 assert_eq!(ranges[3], Range { start: 4, end: 6 });
1190 Ok(())
1191 }
1192
1193 #[test]
1194 fn test_quote_identifier() -> Result<()> {
1195 let cases = vec![
1196 ("foo", r#"foo"#),
1197 ("_foo", r#"_foo"#),
1198 ("foo_bar", r#"foo_bar"#),
1199 ("foo-bar", r#""foo-bar""#),
1200 ("foo.bar", r#""foo.bar""#),
1202 ("Foo", r#""Foo""#),
1203 ("Foo.Bar", r#""Foo.Bar""#),
1204 ("test1", r#"test1"#),
1206 ("1test", r#""1test""#),
1207 ];
1208
1209 for (identifier, quoted_identifier) in cases {
1210 println!("input: \n{identifier}\nquoted_identifier:\n{quoted_identifier}");
1211
1212 assert_eq!(quote_identifier(identifier), quoted_identifier);
1213
1214 let quote_style = if quoted_identifier.starts_with('"') {
1217 Some('"')
1218 } else {
1219 None
1220 };
1221
1222 let expected_parsed = vec![Ident {
1223 value: identifier.to_string(),
1224 quote_style,
1225 span: Span::empty(),
1226 }];
1227
1228 assert_eq!(
1229 parse_identifiers(quoted_identifier).unwrap(),
1230 expected_parsed
1231 );
1232 }
1233
1234 Ok(())
1235 }
1236
1237 #[test]
1238 fn test_get_at_indices() -> Result<()> {
1239 let in_vec = vec![1, 2, 3, 4, 5, 6, 7];
1240 assert_eq!(get_at_indices(&in_vec, [0, 2])?, vec![1, 3]);
1241 assert_eq!(get_at_indices(&in_vec, [4, 2])?, vec![5, 3]);
1242 assert!(get_at_indices(&in_vec, [7]).is_err());
1244 Ok(())
1245 }
1246
1247 #[test]
1248 fn test_longest_consecutive_prefix() {
1249 assert_eq!(longest_consecutive_prefix([0, 3, 4]), 1);
1250 assert_eq!(longest_consecutive_prefix([0, 1, 3, 4]), 2);
1251 assert_eq!(longest_consecutive_prefix([0, 1, 2, 3, 4]), 5);
1252 assert_eq!(longest_consecutive_prefix([1, 2, 3, 4]), 0);
1253 }
1254
1255 #[test]
1256 fn test_merge_and_order_indices() {
1257 assert_eq!(
1258 merge_and_order_indices([0, 3, 4], [1, 3, 5]),
1259 vec![0, 1, 3, 4, 5]
1260 );
1261 assert_eq!(
1263 merge_and_order_indices([3, 0, 4], [5, 1, 3]),
1264 vec![0, 1, 3, 4, 5]
1265 );
1266 }
1267
1268 #[test]
1269 fn test_set_difference() {
1270 assert_eq!(set_difference([0, 3, 4], [1, 2]), vec![0, 3, 4]);
1271 assert_eq!(set_difference([0, 3, 4], [1, 2, 4]), vec![0, 3]);
1272 assert_eq!(set_difference([3, 4, 0], [1, 2, 4]), vec![3, 0]);
1274 assert_eq!(set_difference([0, 3, 4], [4, 1, 2]), vec![0, 3]);
1275 assert_eq!(set_difference([3, 4, 0], [4, 1, 2]), vec![3, 0]);
1276 }
1277
1278 #[test]
1279 #[expect(deprecated)]
1280 fn test_is_sorted() {
1281 assert!(is_sorted::<usize>([]));
1282 assert!(is_sorted([0]));
1283 assert!(is_sorted([0, 3, 4]));
1284 assert!(is_sorted([0, 1, 2]));
1285 assert!(is_sorted([0, 1, 4]));
1286 assert!(is_sorted([0usize; 0]));
1287 assert!(is_sorted([1, 2]));
1288 assert!(!is_sorted([3, 2]));
1289 }
1290
1291 #[test]
1292 fn test_find_indices() -> Result<()> {
1293 assert_eq!(find_indices(&[0, 3, 4], [0, 3, 4])?, vec![0, 1, 2]);
1294 assert_eq!(find_indices(&[0, 3, 4], [0, 4, 3])?, vec![0, 2, 1]);
1295 assert_eq!(find_indices(&[3, 0, 4], [0, 3])?, vec![1, 0]);
1296 assert!(find_indices(&[0, 3], [0, 3, 4]).is_err());
1297 assert!(find_indices(&[0, 3, 4], [0, 2]).is_err());
1298 Ok(())
1299 }
1300
1301 #[test]
1302 fn test_transpose() -> Result<()> {
1303 let in_data = vec![vec![1, 2, 3], vec![4, 5, 6]];
1304 let transposed = transpose(in_data);
1305 let expected = vec![vec![1, 4], vec![2, 5], vec![3, 6]];
1306 assert_eq!(expected, transposed);
1307 Ok(())
1308 }
1309}