Skip to main content

cudf_polars/
lib.rs

1//! GPU execution engine for Polars using NVIDIA libcudf.
2
3pub mod convert;
4pub mod engine;
5pub mod error;
6pub mod expr;
7pub mod gpu_frame;
8pub mod types;
9
10#[cfg(feature = "lazy")]
11pub use engine::collect_gpu;
12pub use engine::execute_plan;
13pub use gpu_frame::GpuDataFrame;
14
15#[cfg(test)]
16mod tests {
17    use super::*;
18    use polars_core::prelude::*;
19
20    #[test]
21    #[cfg(feature = "gpu-tests")]
22    fn roundtrip_i32() {
23        let df = df!("x" => [1i32, 2, 3, 4, 5]).unwrap();
24        let (gpu_table, names) = convert::dataframe_to_gpu(&df).unwrap();
25        let back = convert::gpu_to_dataframe(gpu_table, &names).unwrap();
26        let orig = df.column("x").unwrap().i32().unwrap();
27        let result = back.column("x").unwrap().i32().unwrap();
28        assert_eq!(orig.len(), result.len());
29        for i in 0..orig.len() {
30            assert_eq!(orig.get(i), result.get(i), "mismatch at index {}", i);
31        }
32    }
33
34    #[test]
35    #[cfg(feature = "gpu-tests")]
36    fn roundtrip_f64() {
37        let df = df!("val" => [1.1f64, 2.2, 3.3]).unwrap();
38        let (gpu_table, names) = convert::dataframe_to_gpu(&df).unwrap();
39        let back = convert::gpu_to_dataframe(gpu_table, &names).unwrap();
40        let orig = df.column("val").unwrap().f64().unwrap();
41        let result = back.column("val").unwrap().f64().unwrap();
42        assert_eq!(orig.len(), result.len());
43        for i in 0..orig.len() {
44            let o = orig.get(i).unwrap();
45            let r = result.get(i).unwrap();
46            assert!(
47                (o - r).abs() < f64::EPSILON,
48                "mismatch at index {}: {} vs {}",
49                i,
50                o,
51                r
52            );
53        }
54    }
55
56    #[test]
57    #[cfg(feature = "gpu-tests")]
58    fn roundtrip_string() {
59        let df = df!("s" => ["hello", "world", "gpu"]).unwrap();
60        let (gpu_table, names) = convert::dataframe_to_gpu(&df).unwrap();
61        let back = convert::gpu_to_dataframe(gpu_table, &names).unwrap();
62        let orig = df.column("s").unwrap().str().unwrap();
63        let result = back.column("s").unwrap().str().unwrap();
64        assert_eq!(orig.len(), result.len());
65        for i in 0..orig.len() {
66            assert_eq!(orig.get(i), result.get(i), "mismatch at index {}", i);
67        }
68    }
69
70    #[test]
71    #[cfg(feature = "gpu-tests")]
72    fn roundtrip_multi_column() {
73        let df = df!(
74            "id" => [1i64, 2, 3],
75            "value" => [10.0f64, 20.0, 30.0],
76            "name" => ["a", "b", "c"]
77        )
78        .unwrap();
79        let (gpu_table, names) = convert::dataframe_to_gpu(&df).unwrap();
80        let back = convert::gpu_to_dataframe(gpu_table, &names).unwrap();
81        assert_eq!(df.height(), back.height());
82        assert_eq!(df.width(), back.width());
83        let orig_id = df.column("id").unwrap().i64().unwrap();
84        let result_id = back.column("id").unwrap().i64().unwrap();
85        for i in 0..orig_id.len() {
86            assert_eq!(
87                orig_id.get(i),
88                result_id.get(i),
89                "id mismatch at index {}",
90                i
91            );
92        }
93        let orig_val = df.column("value").unwrap().f64().unwrap();
94        let result_val = back.column("value").unwrap().f64().unwrap();
95        for i in 0..orig_val.len() {
96            let o = orig_val.get(i).unwrap();
97            let r = result_val.get(i).unwrap();
98            assert!(
99                (o - r).abs() < f64::EPSILON,
100                "value mismatch at index {}: {} vs {}",
101                i,
102                o,
103                r
104            );
105        }
106        let orig_name = df.column("name").unwrap().str().unwrap();
107        let result_name = back.column("name").unwrap().str().unwrap();
108        for i in 0..orig_name.len() {
109            assert_eq!(
110                orig_name.get(i),
111                result_name.get(i),
112                "name mismatch at index {}",
113                i
114            );
115        }
116    }
117
118    #[test]
119    #[cfg(feature = "gpu-tests")]
120    fn roundtrip_boolean() {
121        let df = df!("flag" => [true, false, true]).unwrap();
122        let (gpu_table, names) = convert::dataframe_to_gpu(&df).unwrap();
123        let back = convert::gpu_to_dataframe(gpu_table, &names).unwrap();
124        let orig = df.column("flag").unwrap().bool().unwrap();
125        let result = back.column("flag").unwrap().bool().unwrap();
126        assert_eq!(orig.len(), result.len());
127        for i in 0..orig.len() {
128            assert_eq!(orig.get(i), result.get(i), "mismatch at index {}", i);
129        }
130    }
131
132    #[test]
133    #[cfg(feature = "gpu-tests")]
134    fn roundtrip_nullable_i32() {
135        let df = df!("x" => &[Some(1i32), None, Some(3), None, Some(5)]).unwrap();
136        let (gpu_table, names) = convert::dataframe_to_gpu(&df).unwrap();
137        let back = convert::gpu_to_dataframe(gpu_table, &names).unwrap();
138        let orig = df.column("x").unwrap().i32().unwrap();
139        let result = back.column("x").unwrap().i32().unwrap();
140        assert_eq!(orig.len(), result.len());
141        for i in 0..orig.len() {
142            assert_eq!(orig.get(i), result.get(i), "mismatch at index {}", i);
143        }
144    }
145
146    #[test]
147    #[cfg(feature = "gpu-tests")]
148    fn roundtrip_empty() {
149        let df = df!("x" => Vec::<i32>::new()).unwrap();
150        let (gpu_table, names) = convert::dataframe_to_gpu(&df).unwrap();
151        let back = convert::gpu_to_dataframe(gpu_table, &names).unwrap();
152        assert_eq!(back.height(), 0);
153        assert_eq!(back.width(), 1);
154    }
155}
156
157#[cfg(test)]
158mod engine_tests {
159    use crate::error as gpu_error;
160    use crate::expr as gpu_expr;
161    use crate::gpu_frame::GpuDataFrame;
162    use polars_core::prelude::*;
163
164    // ── GpuDataFrame component tests ──
165
166    #[test]
167    #[cfg(feature = "gpu-tests")]
168    fn gpu_frame_select_columns() {
169        let df = df!(
170            "a" => [1i32, 2, 3],
171            "b" => [4i32, 5, 6],
172            "c" => [7i32, 8, 9]
173        )
174        .unwrap();
175        let gpu_df = GpuDataFrame::from_polars(&df).unwrap();
176        let selected = gpu_df.select_columns(&["a", "c"]).unwrap();
177        assert_eq!(selected.width(), 2);
178        assert_eq!(selected.height(), 3);
179        let back = selected.to_polars().unwrap();
180        assert_eq!(back.width(), 2);
181        let a = back.column("a").unwrap().i32().unwrap();
182        assert_eq!(a.get(0), Some(1));
183        assert_eq!(a.get(2), Some(3));
184        let c = back.column("c").unwrap().i32().unwrap();
185        assert_eq!(c.get(0), Some(7));
186    }
187
188    #[test]
189    #[cfg(feature = "gpu-tests")]
190    fn gpu_frame_boolean_mask() {
191        let df = df!("x" => [1i32, 2, 3, 4, 5]).unwrap();
192        let gpu_df = GpuDataFrame::from_polars(&df).unwrap();
193
194        // Create mask: x > 2 → [false, false, true, true, true]
195        let x_col = gpu_df.column_by_name("x").unwrap();
196        let threshold = cudf::Scalar::new(2i32).unwrap();
197        let mask = gpu_error::gpu_result(x_col.binary_op_scalar(
198            &threshold,
199            cudf::BinaryOp::Greater,
200            cudf::types::DataType::new(cudf::types::TypeId::Bool8),
201        ))
202        .unwrap();
203
204        let filtered = gpu_df.apply_boolean_mask(&mask).unwrap();
205        assert_eq!(filtered.height(), 3);
206        let back = filtered.to_polars().unwrap();
207        let vals: Vec<i32> = back
208            .column("x")
209            .unwrap()
210            .i32()
211            .unwrap()
212            .into_no_null_iter()
213            .collect();
214        assert_eq!(vals, vec![3, 4, 5]);
215    }
216
217    #[test]
218    #[cfg(feature = "gpu-tests")]
219    fn gpu_frame_slice() {
220        let df = df!("x" => [10i32, 20, 30, 40, 50]).unwrap();
221        let gpu_df = GpuDataFrame::from_polars(&df).unwrap();
222        let sliced = gpu_df.slice(1, 3).unwrap();
223        assert_eq!(sliced.height(), 3);
224        let back = sliced.to_polars().unwrap();
225        let vals: Vec<i32> = back
226            .column("x")
227            .unwrap()
228            .i32()
229            .unwrap()
230            .into_no_null_iter()
231            .collect();
232        assert_eq!(vals, vec![20, 30, 40]);
233    }
234
235    #[test]
236    #[cfg(feature = "gpu-tests")]
237    fn gpu_frame_negative_offset_slice() {
238        let df = df!("x" => [10i32, 20, 30, 40, 50]).unwrap();
239        let gpu_df = GpuDataFrame::from_polars(&df).unwrap();
240        // Negative offset: last 2 rows
241        let sliced = gpu_df.slice(-2, 2).unwrap();
242        assert_eq!(sliced.height(), 2);
243        let back = sliced.to_polars().unwrap();
244        let vals: Vec<i32> = back
245            .column("x")
246            .unwrap()
247            .i32()
248            .unwrap()
249            .into_no_null_iter()
250            .collect();
251        assert_eq!(vals, vec![40, 50]);
252    }
253
254    // ── Expression evaluation tests ──
255
256    #[test]
257    #[cfg(feature = "gpu-tests")]
258    fn expr_binary_add() {
259        use polars_plan::dsl::Operator;
260        use polars_plan::plans::AExpr;
261        use polars_utils::arena::Arena;
262
263        let df = df!("a" => [1i32, 2, 3], "b" => [10i32, 20, 30]).unwrap();
264        let gpu_df = GpuDataFrame::from_polars(&df).unwrap();
265
266        let mut arena = Arena::new();
267        let left = arena.add(AExpr::Column("a".into()));
268        let right = arena.add(AExpr::Column("b".into()));
269        let add = arena.add(AExpr::BinaryExpr {
270            left,
271            op: Operator::Plus,
272            right,
273        });
274
275        let result = gpu_expr::eval_expr(add, &arena, &gpu_df).unwrap();
276        let vals: Vec<i32> = gpu_error::gpu_result(result.to_vec()).unwrap();
277        assert_eq!(vals, vec![11, 22, 33]);
278    }
279
280    #[test]
281    #[cfg(feature = "gpu-tests")]
282    fn expr_comparison() {
283        use polars_plan::dsl::Operator;
284        use polars_plan::plans::AExpr;
285        use polars_plan::plans::{DynLiteralValue, LiteralValue};
286        use polars_utils::arena::Arena;
287
288        let df = df!("x" => [1i32, 2, 3, 4, 5]).unwrap();
289        let gpu_df = GpuDataFrame::from_polars(&df).unwrap();
290
291        let mut arena = Arena::new();
292        let col_node = arena.add(AExpr::Column("x".into()));
293        let lit_node = arena.add(AExpr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(3))));
294        let cmp = arena.add(AExpr::BinaryExpr {
295            left: col_node,
296            op: Operator::Gt,
297            right: lit_node,
298        });
299
300        let result = gpu_expr::eval_expr(cmp, &arena, &gpu_df).unwrap();
301        let vals: Vec<bool> = gpu_error::gpu_result(result.to_vec()).unwrap();
302        assert_eq!(vals, vec![false, false, false, true, true]);
303    }
304
305    #[test]
306    #[cfg(feature = "gpu-tests")]
307    fn expr_cast() {
308        use polars_core::chunked_array::cast::CastOptions;
309        use polars_core::prelude::DataType;
310        use polars_plan::plans::AExpr;
311        use polars_utils::arena::Arena;
312
313        let df = df!("x" => [1i32, 2, 3]).unwrap();
314        let gpu_df = GpuDataFrame::from_polars(&df).unwrap();
315
316        let mut arena = Arena::new();
317        let col_node = arena.add(AExpr::Column("x".into()));
318        let cast_node = arena.add(AExpr::Cast {
319            expr: col_node,
320            dtype: DataType::Float64,
321            options: CastOptions::NonStrict,
322        });
323
324        let result = gpu_expr::eval_expr(cast_node, &arena, &gpu_df).unwrap();
325        let vals: Vec<f64> = gpu_error::gpu_result(result.to_vec()).unwrap();
326        assert_eq!(vals, vec![1.0, 2.0, 3.0]);
327    }
328
329    #[test]
330    #[cfg(feature = "gpu-tests")]
331    fn expr_len() {
332        use polars_plan::plans::AExpr;
333        use polars_utils::arena::Arena;
334
335        let df = df!("x" => [1i32, 2, 3, 4, 5]).unwrap();
336        let gpu_df = GpuDataFrame::from_polars(&df).unwrap();
337
338        let mut arena = Arena::new();
339        let len_node = arena.add(AExpr::Len);
340
341        let result = gpu_expr::eval_expr(len_node, &arena, &gpu_df).unwrap();
342        let vals: Vec<u32> = gpu_error::gpu_result(result.to_vec()).unwrap();
343        assert_eq!(vals, vec![5u32; 5]);
344    }
345
346    // ── M3: Sort tests ──
347
348    #[test]
349    #[cfg(feature = "gpu-tests")]
350    fn gpu_frame_sort_ascending() {
351        use cudf::sorting::{NullOrder, SortOrder};
352
353        let df = df!("x" => [3i32, 1, 4, 1, 5]).unwrap();
354        let gpu_df = GpuDataFrame::from_polars(&df).unwrap();
355
356        let key = gpu_df.column_by_name("x").unwrap();
357        let sorted = gpu_df
358            .sort_by_key(vec![key], &[SortOrder::Ascending], &[NullOrder::After])
359            .unwrap();
360
361        let back = sorted.to_polars().unwrap();
362        let vals: Vec<i32> = back
363            .column("x")
364            .unwrap()
365            .i32()
366            .unwrap()
367            .into_no_null_iter()
368            .collect();
369        assert_eq!(vals, vec![1, 1, 3, 4, 5]);
370    }
371
372    #[test]
373    #[cfg(feature = "gpu-tests")]
374    fn gpu_frame_sort_descending() {
375        use cudf::sorting::{NullOrder, SortOrder};
376
377        let df = df!("x" => [3i32, 1, 4, 1, 5]).unwrap();
378        let gpu_df = GpuDataFrame::from_polars(&df).unwrap();
379
380        let key = gpu_df.column_by_name("x").unwrap();
381        let sorted = gpu_df
382            .sort_by_key(vec![key], &[SortOrder::Descending], &[NullOrder::After])
383            .unwrap();
384
385        let back = sorted.to_polars().unwrap();
386        let vals: Vec<i32> = back
387            .column("x")
388            .unwrap()
389            .i32()
390            .unwrap()
391            .into_no_null_iter()
392            .collect();
393        assert_eq!(vals, vec![5, 4, 3, 1, 1]);
394    }
395
396    // ── M3: GroupBy tests ──
397
398    #[test]
399    #[cfg(feature = "gpu-tests")]
400    fn gpu_frame_groupby_sum() {
401        use cudf::aggregation::AggregationKind;
402
403        let df = df!(
404            "cat" => [1i32, 1, 2, 2, 3],
405            "val" => [10.0f64, 20.0, 30.0, 40.0, 50.0]
406        )
407        .unwrap();
408        let gpu_df = GpuDataFrame::from_polars(&df).unwrap();
409
410        let key_col = gpu_df.column_by_name("cat").unwrap();
411        let val_col = gpu_df.column_by_name("val").unwrap();
412
413        let result = gpu_df
414            .groupby(
415                vec![key_col],
416                vec!["cat".to_string()],
417                vec![val_col],
418                vec![(0, AggregationKind::Sum)],
419                vec!["val_sum".to_string()],
420                true, // maintain_order
421            )
422            .unwrap();
423
424        assert_eq!(result.height(), 3); // 3 groups
425        let back = result.to_polars().unwrap();
426        assert_eq!(back.width(), 2); // cat + val_sum
427
428        // Verify actual sum values (sorted by key)
429        let cats: Vec<i32> = back
430            .column("cat")
431            .unwrap()
432            .i32()
433            .unwrap()
434            .into_no_null_iter()
435            .collect();
436        let sums: Vec<f64> = back
437            .column("val_sum")
438            .unwrap()
439            .f64()
440            .unwrap()
441            .into_no_null_iter()
442            .collect();
443        assert_eq!(cats, vec![1, 2, 3]);
444        assert_eq!(sums, vec![30.0, 70.0, 50.0]);
445    }
446
447    #[test]
448    #[cfg(feature = "gpu-tests")]
449    fn gpu_frame_groupby_mean_count() {
450        use cudf::aggregation::AggregationKind;
451
452        let df = df!(
453            "grp" => [1i32, 1, 2, 2],
454            "a" => [10.0f64, 20.0, 30.0, 40.0],
455            "b" => [100i32, 200, 300, 400]
456        )
457        .unwrap();
458        let gpu_df = GpuDataFrame::from_polars(&df).unwrap();
459
460        let key_col = gpu_df.column_by_name("grp").unwrap();
461        let a_col = gpu_df.column_by_name("a").unwrap();
462        let b_col = gpu_df.column_by_name("b").unwrap();
463
464        let result = gpu_df
465            .groupby(
466                vec![key_col],
467                vec!["grp".to_string()],
468                vec![a_col, b_col],
469                vec![(0, AggregationKind::Mean), (1, AggregationKind::Count)],
470                vec!["a_mean".to_string(), "b_count".to_string()],
471                false, // maintain_order
472            )
473            .unwrap();
474
475        assert_eq!(result.height(), 2); // 2 groups
476        assert_eq!(result.width(), 3); // grp + a_mean + b_count
477    }
478
479    // ── M3: Distinct tests ──
480
481    #[test]
482    #[cfg(feature = "gpu-tests")]
483    fn gpu_frame_distinct_all_columns() {
484        use cudf::stream_compaction::DuplicateKeepOption;
485
486        let df = df!(
487            "x" => [1i32, 2, 2, 3, 3, 3],
488            "y" => [10i32, 20, 20, 30, 30, 30]
489        )
490        .unwrap();
491        let gpu_df = GpuDataFrame::from_polars(&df).unwrap();
492
493        let result = gpu_df
494            .distinct(None, DuplicateKeepOption::First, false)
495            .unwrap();
496        assert_eq!(result.height(), 3); // 3 unique rows
497
498        // Verify actual distinct values (sort by x to handle non-deterministic order)
499        let x_col = result.column_by_name("x").unwrap();
500        let sorted = result
501            .sort_by_key(
502                vec![x_col],
503                &[cudf::sorting::SortOrder::Ascending],
504                &[cudf::sorting::NullOrder::After],
505            )
506            .unwrap();
507        let back = sorted.to_polars().unwrap();
508        let xs: Vec<i32> = back
509            .column("x")
510            .unwrap()
511            .i32()
512            .unwrap()
513            .into_no_null_iter()
514            .collect();
515        let ys: Vec<i32> = back
516            .column("y")
517            .unwrap()
518            .i32()
519            .unwrap()
520            .into_no_null_iter()
521            .collect();
522        assert_eq!(xs, vec![1, 2, 3]);
523        assert_eq!(ys, vec![10, 20, 30]);
524    }
525
526    #[test]
527    #[cfg(feature = "gpu-tests")]
528    fn gpu_frame_distinct_subset() {
529        use cudf::stream_compaction::DuplicateKeepOption;
530
531        let df = df!(
532            "x" => [1i32, 1, 2, 2],
533            "y" => [10i32, 20, 30, 40]
534        )
535        .unwrap();
536        let gpu_df = GpuDataFrame::from_polars(&df).unwrap();
537
538        let result = gpu_df
539            .distinct(Some(&["x"]), DuplicateKeepOption::First, true)
540            .unwrap();
541        assert_eq!(result.height(), 2); // 2 unique x values
542        assert_eq!(result.width(), 2); // both columns preserved
543    }
544}
545
546#[cfg(test)]
547mod m4_tests {
548    use crate::error as gpu_error;
549    use crate::gpu_frame::GpuDataFrame;
550    use polars_core::prelude::*;
551
552    // ── Join tests ──
553
554    #[test]
555    #[cfg(feature = "gpu-tests")]
556    fn test_inner_join() {
557        // Left: id=[1,2,3,4], val=[10,20,30,40]
558        // Right: id=[2,3,5], score=[200,300,500]
559        // Inner join on id: expect matches at id=2,3
560        let left_df = df!(
561            "id" => [1i32, 2, 3, 4],
562            "val" => [10i32, 20, 30, 40]
563        )
564        .unwrap();
565        let right_df = df!(
566            "id" => [2i32, 3, 5],
567            "score" => [200i32, 300, 500]
568        )
569        .unwrap();
570
571        let left_gpu = GpuDataFrame::from_polars(&left_df).unwrap();
572        let right_gpu = GpuDataFrame::from_polars(&right_df).unwrap();
573
574        // Build key tables
575        let left_keys = vec![left_gpu.column_by_name("id").unwrap()];
576        let right_keys = vec![right_gpu.column_by_name("id").unwrap()];
577        let left_keys_table = gpu_error::gpu_result(cudf::Table::new(left_keys)).unwrap();
578        let right_keys_table = gpu_error::gpu_result(cudf::Table::new(right_keys)).unwrap();
579
580        let result = gpu_error::gpu_result(left_keys_table.inner_join(&right_keys_table)).unwrap();
581        let left_gathered =
582            gpu_error::gpu_result(left_gpu.inner_table().gather(&result.left_indices)).unwrap();
583        let right_gathered =
584            gpu_error::gpu_result(right_gpu.inner_table().gather(&result.right_indices)).unwrap();
585
586        // Build combined result
587        let mut cols = Vec::new();
588        let mut names = Vec::new();
589        for i in 0..left_gathered.num_columns() {
590            cols.push(gpu_error::gpu_result(left_gathered.column(i)).unwrap());
591            names.push(left_gpu.names()[i].clone());
592        }
593        for i in 0..right_gathered.num_columns() {
594            let rname = &right_gpu.names()[i];
595            if names.contains(rname) {
596                names.push(format!("{}_right", rname));
597            } else {
598                names.push(rname.clone());
599            }
600            cols.push(gpu_error::gpu_result(right_gathered.column(i)).unwrap());
601        }
602
603        let joined = GpuDataFrame::from_columns(cols, names).unwrap();
604        assert_eq!(joined.height(), 2); // 2 matching rows
605
606        let back = joined.to_polars().unwrap();
607        // Sort by id for deterministic check
608        let back = back.sort(["id"], Default::default()).unwrap();
609        let ids: Vec<i32> = back
610            .column("id")
611            .unwrap()
612            .i32()
613            .unwrap()
614            .into_no_null_iter()
615            .collect();
616        let vals: Vec<i32> = back
617            .column("val")
618            .unwrap()
619            .i32()
620            .unwrap()
621            .into_no_null_iter()
622            .collect();
623        let scores: Vec<i32> = back
624            .column("score")
625            .unwrap()
626            .i32()
627            .unwrap()
628            .into_no_null_iter()
629            .collect();
630        assert_eq!(ids, vec![2, 3]);
631        assert_eq!(vals, vec![20, 30]);
632        assert_eq!(scores, vec![200, 300]);
633    }
634
635    #[test]
636    #[cfg(feature = "gpu-tests")]
637    fn test_left_join() {
638        let left_df = df!(
639            "id" => [1i32, 2, 3, 4],
640            "val" => [10i32, 20, 30, 40]
641        )
642        .unwrap();
643        let right_df = df!(
644            "id" => [2i32, 3, 5],
645            "score" => [200i32, 300, 500]
646        )
647        .unwrap();
648
649        let left_gpu = GpuDataFrame::from_polars(&left_df).unwrap();
650        let right_gpu = GpuDataFrame::from_polars(&right_df).unwrap();
651
652        let left_keys = vec![left_gpu.column_by_name("id").unwrap()];
653        let right_keys = vec![right_gpu.column_by_name("id").unwrap()];
654        let left_keys_table = gpu_error::gpu_result(cudf::Table::new(left_keys)).unwrap();
655        let right_keys_table = gpu_error::gpu_result(cudf::Table::new(right_keys)).unwrap();
656
657        let result = gpu_error::gpu_result(left_keys_table.left_join(&right_keys_table)).unwrap();
658        let left_gathered =
659            gpu_error::gpu_result(left_gpu.inner_table().gather(&result.left_indices)).unwrap();
660        let right_gathered =
661            gpu_error::gpu_result(right_gpu.inner_table().gather(&result.right_indices)).unwrap();
662
663        // Left join: all 4 left rows preserved
664        assert_eq!(left_gathered.num_rows(), 4);
665        assert_eq!(right_gathered.num_rows(), 4);
666
667        // Verify actual values: combine left+right gathered, then sort by id
668        let left_result_df = GpuDataFrame::from_table(left_gathered, left_gpu.names().to_vec());
669        let right_result_df = GpuDataFrame::from_table(right_gathered, right_gpu.names().to_vec());
670        let left_back = left_result_df.to_polars().unwrap();
671        let right_back = right_result_df.to_polars().unwrap();
672
673        // Combine left and right columns into one DataFrame, then sort
674        let mut combined = left_back.clone();
675        combined
676            .with_column(right_back.column("score").unwrap().clone())
677            .unwrap();
678        let combined = combined.sort(["id"], Default::default()).unwrap();
679
680        let ids: Vec<i32> = combined
681            .column("id")
682            .unwrap()
683            .i32()
684            .unwrap()
685            .into_no_null_iter()
686            .collect();
687        assert_eq!(ids, vec![1, 2, 3, 4]); // all left rows preserved
688        let vals: Vec<i32> = combined
689            .column("val")
690            .unwrap()
691            .i32()
692            .unwrap()
693            .into_no_null_iter()
694            .collect();
695        assert_eq!(vals, vec![10, 20, 30, 40]);
696        let scores: Vec<Option<i32>> = combined
697            .column("score")
698            .unwrap()
699            .i32()
700            .unwrap()
701            .into_iter()
702            .collect();
703        // id=1 -> None, id=2 -> 200, id=3 -> 300, id=4 -> None
704        assert_eq!(scores, vec![None, Some(200), Some(300), None]);
705    }
706
707    #[test]
708    #[cfg(feature = "gpu-tests")]
709    fn test_left_semi_join() {
710        let left_df = df!(
711            "id" => [1i32, 2, 3, 4],
712            "val" => [10i32, 20, 30, 40]
713        )
714        .unwrap();
715        let right_df = df!(
716            "id" => [2i32, 3, 5]
717        )
718        .unwrap();
719
720        let left_gpu = GpuDataFrame::from_polars(&left_df).unwrap();
721        let right_gpu = GpuDataFrame::from_polars(&right_df).unwrap();
722
723        let left_keys = vec![left_gpu.column_by_name("id").unwrap()];
724        let right_keys = vec![right_gpu.column_by_name("id").unwrap()];
725        let left_keys_table = gpu_error::gpu_result(cudf::Table::new(left_keys)).unwrap();
726        let right_keys_table = gpu_error::gpu_result(cudf::Table::new(right_keys)).unwrap();
727
728        let result =
729            gpu_error::gpu_result(left_keys_table.left_semi_join(&right_keys_table)).unwrap();
730        let gathered =
731            gpu_error::gpu_result(left_gpu.inner_table().gather(&result.left_indices)).unwrap();
732        let gathered_df = GpuDataFrame::from_table(gathered, left_gpu.names().to_vec());
733
734        // Sort for deterministic output
735        let id_col = gathered_df.column_by_name("id").unwrap();
736        let sorted = gathered_df
737            .sort_by_key(
738                vec![id_col],
739                &[cudf::sorting::SortOrder::Ascending],
740                &[cudf::sorting::NullOrder::After],
741            )
742            .unwrap();
743
744        assert_eq!(sorted.height(), 2); // only rows with id 2, 3
745        let back = sorted.to_polars().unwrap();
746        let ids: Vec<i32> = back
747            .column("id")
748            .unwrap()
749            .i32()
750            .unwrap()
751            .into_no_null_iter()
752            .collect();
753        assert_eq!(ids, vec![2, 3]);
754    }
755
756    #[test]
757    #[cfg(feature = "gpu-tests")]
758    fn test_left_anti_join() {
759        let left_df = df!(
760            "id" => [1i32, 2, 3, 4],
761            "val" => [10i32, 20, 30, 40]
762        )
763        .unwrap();
764        let right_df = df!(
765            "id" => [2i32, 3, 5]
766        )
767        .unwrap();
768
769        let left_gpu = GpuDataFrame::from_polars(&left_df).unwrap();
770        let right_gpu = GpuDataFrame::from_polars(&right_df).unwrap();
771
772        let left_keys = vec![left_gpu.column_by_name("id").unwrap()];
773        let right_keys = vec![right_gpu.column_by_name("id").unwrap()];
774        let left_keys_table = gpu_error::gpu_result(cudf::Table::new(left_keys)).unwrap();
775        let right_keys_table = gpu_error::gpu_result(cudf::Table::new(right_keys)).unwrap();
776
777        let result =
778            gpu_error::gpu_result(left_keys_table.left_anti_join(&right_keys_table)).unwrap();
779        let gathered =
780            gpu_error::gpu_result(left_gpu.inner_table().gather(&result.left_indices)).unwrap();
781        let gathered_df = GpuDataFrame::from_table(gathered, left_gpu.names().to_vec());
782
783        let id_col = gathered_df.column_by_name("id").unwrap();
784        let sorted = gathered_df
785            .sort_by_key(
786                vec![id_col],
787                &[cudf::sorting::SortOrder::Ascending],
788                &[cudf::sorting::NullOrder::After],
789            )
790            .unwrap();
791
792        assert_eq!(sorted.height(), 2); // rows with id 1, 4 (not in right)
793        let back = sorted.to_polars().unwrap();
794        let ids: Vec<i32> = back
795            .column("id")
796            .unwrap()
797            .i32()
798            .unwrap()
799            .into_no_null_iter()
800            .collect();
801        assert_eq!(ids, vec![1, 4]);
802    }
803
804    #[test]
805    #[cfg(feature = "gpu-tests")]
806    fn test_cross_join() {
807        let left_df = df!(
808            "a" => [1i32, 2]
809        )
810        .unwrap();
811        let right_df = df!(
812            "b" => [10i32, 20, 30]
813        )
814        .unwrap();
815
816        let left_gpu = GpuDataFrame::from_polars(&left_df).unwrap();
817        let right_gpu = GpuDataFrame::from_polars(&right_df).unwrap();
818
819        let cross =
820            gpu_error::gpu_result(left_gpu.inner_table().cross_join(right_gpu.inner_table()))
821                .unwrap();
822        assert_eq!(cross.num_rows(), 6); // 2 * 3
823        assert_eq!(cross.num_columns(), 2); // a, b
824
825        // Verify actual values
826        let names = vec!["a".to_string(), "b".to_string()];
827        let result_df = GpuDataFrame::from_table(cross, names);
828        let back = result_df.to_polars().unwrap();
829        let back = back.sort(["a", "b"], Default::default()).unwrap();
830        let a_vals: Vec<i32> = back
831            .column("a")
832            .unwrap()
833            .i32()
834            .unwrap()
835            .into_no_null_iter()
836            .collect();
837        let b_vals: Vec<i32> = back
838            .column("b")
839            .unwrap()
840            .i32()
841            .unwrap()
842            .into_no_null_iter()
843            .collect();
844        // Cross join: each left row x each right row, sorted by (a, b)
845        assert_eq!(a_vals, vec![1, 1, 1, 2, 2, 2]);
846        assert_eq!(b_vals, vec![10, 20, 30, 10, 20, 30]);
847    }
848
849    // ── Union (vertical concat) test ──
850
851    #[test]
852    #[cfg(feature = "gpu-tests")]
853    fn test_union_concat() {
854        let df1 = df!(
855            "x" => [1i32, 2, 3],
856            "y" => [10i32, 20, 30]
857        )
858        .unwrap();
859        let df2 = df!(
860            "x" => [4i32, 5],
861            "y" => [40i32, 50]
862        )
863        .unwrap();
864
865        let gpu1 = GpuDataFrame::from_polars(&df1).unwrap();
866        let gpu2 = GpuDataFrame::from_polars(&df2).unwrap();
867
868        let table_refs = vec![gpu1.inner_table(), gpu2.inner_table()];
869        let concatenated =
870            gpu_error::gpu_result(cudf::concatenate::concatenate_tables(&table_refs)).unwrap();
871        let result = GpuDataFrame::from_table(concatenated, gpu1.names().to_vec());
872
873        assert_eq!(result.height(), 5);
874        assert_eq!(result.width(), 2);
875
876        let back = result.to_polars().unwrap();
877        let xs: Vec<i32> = back
878            .column("x")
879            .unwrap()
880            .i32()
881            .unwrap()
882            .into_no_null_iter()
883            .collect();
884        let ys: Vec<i32> = back
885            .column("y")
886            .unwrap()
887            .i32()
888            .unwrap()
889            .into_no_null_iter()
890            .collect();
891        assert_eq!(xs, vec![1, 2, 3, 4, 5]);
892        assert_eq!(ys, vec![10, 20, 30, 40, 50]);
893    }
894
895    // ── HConcat (horizontal concat) test ──
896
897    #[test]
898    #[cfg(feature = "gpu-tests")]
899    fn test_hconcat() {
900        let df1 = df!("a" => [1i32, 2, 3]).unwrap();
901        let df2 = df!("b" => [10i32, 20, 30]).unwrap();
902
903        let gpu1 = GpuDataFrame::from_polars(&df1).unwrap();
904        let gpu2 = GpuDataFrame::from_polars(&df2).unwrap();
905
906        // Combine columns
907        let mut all_cols = Vec::new();
908        let mut all_names = Vec::new();
909        for i in 0..gpu1.width() {
910            all_cols.push(gpu1.column(i).unwrap());
911            all_names.push(gpu1.names()[i].clone());
912        }
913        for i in 0..gpu2.width() {
914            all_cols.push(gpu2.column(i).unwrap());
915            all_names.push(gpu2.names()[i].clone());
916        }
917
918        let combined = GpuDataFrame::from_columns(all_cols, all_names).unwrap();
919        assert_eq!(combined.width(), 2);
920        assert_eq!(combined.height(), 3);
921
922        let back = combined.to_polars().unwrap();
923        let a_vals: Vec<i32> = back
924            .column("a")
925            .unwrap()
926            .i32()
927            .unwrap()
928            .into_no_null_iter()
929            .collect();
930        let b_vals: Vec<i32> = back
931            .column("b")
932            .unwrap()
933            .i32()
934            .unwrap()
935            .into_no_null_iter()
936            .collect();
937        assert_eq!(a_vals, vec![1, 2, 3]);
938        assert_eq!(b_vals, vec![10, 20, 30]);
939    }
940
941    // ── Ternary (if-then-else / copy_if_else) test ──
942
943    #[test]
944    #[cfg(feature = "gpu-tests")]
945    fn test_ternary_copy_if_else() {
946        // mask = [true, false, true, false, true]
947        // truthy = [10, 20, 30, 40, 50]
948        // falsy = [100, 200, 300, 400, 500]
949        // result = [10, 200, 30, 400, 50]
950        let mask =
951            gpu_error::gpu_result(cudf::Column::from_slice(&[true, false, true, false, true]))
952                .unwrap();
953        let truthy =
954            gpu_error::gpu_result(cudf::Column::from_slice(&[10i32, 20, 30, 40, 50])).unwrap();
955        let falsy =
956            gpu_error::gpu_result(cudf::Column::from_slice(&[100i32, 200, 300, 400, 500])).unwrap();
957
958        let result = gpu_error::gpu_result(truthy.copy_if_else(&falsy, &mask)).unwrap();
959        let vals: Vec<i32> = gpu_error::gpu_result(result.to_vec()).unwrap();
960        assert_eq!(vals, vec![10, 200, 30, 400, 50]);
961    }
962
963    // ── Function tests (IsNull, IsNotNull, IsNan, Abs) ──
964
965    #[test]
966    #[cfg(feature = "gpu-tests")]
967    fn test_is_null() {
968        let opts: Vec<Option<i32>> = vec![Some(1), None, Some(3), None, Some(5)];
969        let col = gpu_error::gpu_result(cudf::Column::from_optional_i32(&opts)).unwrap();
970        let result = gpu_error::gpu_result(col.is_null()).unwrap();
971        let vals: Vec<bool> = gpu_error::gpu_result(result.to_vec()).unwrap();
972        assert_eq!(vals, vec![false, true, false, true, false]);
973    }
974
975    #[test]
976    #[cfg(feature = "gpu-tests")]
977    fn test_is_valid() {
978        let opts: Vec<Option<i32>> = vec![Some(1), None, Some(3), None, Some(5)];
979        let col = gpu_error::gpu_result(cudf::Column::from_optional_i32(&opts)).unwrap();
980        let result = gpu_error::gpu_result(col.is_valid()).unwrap();
981        let vals: Vec<bool> = gpu_error::gpu_result(result.to_vec()).unwrap();
982        assert_eq!(vals, vec![true, false, true, false, true]);
983    }
984
985    #[test]
986    #[cfg(feature = "gpu-tests")]
987    fn test_abs() {
988        use cudf::unary::UnaryOp;
989
990        let col = gpu_error::gpu_result(cudf::Column::from_slice(&[-3i32, -1, 0, 2, 5])).unwrap();
991        let result = gpu_error::gpu_result(col.unary_op(UnaryOp::Abs)).unwrap();
992        let vals: Vec<i32> = gpu_error::gpu_result(result.to_vec()).unwrap();
993        assert_eq!(vals, vec![3, 1, 0, 2, 5]);
994    }
995
996    #[test]
997    #[cfg(feature = "gpu-tests")]
998    fn test_not() {
999        use cudf::unary::UnaryOp;
1000
1001        let col = gpu_error::gpu_result(cudf::Column::from_slice(&[true, false, true])).unwrap();
1002        let result = gpu_error::gpu_result(col.unary_op(UnaryOp::Not)).unwrap();
1003        let vals: Vec<bool> = gpu_error::gpu_result(result.to_vec()).unwrap();
1004        assert_eq!(vals, vec![false, true, false]);
1005    }
1006}