1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#pragma once
#include <cudf/null_mask.hpp>
#include <cudf/column/column.hpp>
#include <cudf/types.hpp>
#include <rmm/device_buffer.hpp>
#include <memory>
#include <vector>
#include "rust/cxx.h"
#include "column_shim.h"
namespace cudf_shims {
// ── Null Mask Utilities ───────────────────────────────────────
/// Opaque wrapper around rmm::device_buffer for null masks.
struct OwnedDeviceBuffer {
rmm::device_buffer inner;
explicit OwnedDeviceBuffer(rmm::device_buffer buf)
: inner(std::move(buf)) {}
/// Size of the buffer in bytes.
int32_t size_bytes() const {
return static_cast<int32_t>(inner.size());
}
};
// ── Existing functions (Phase 1) ─────────────────────────────
/// Count the number of valid (non-null) elements in a column.
int32_t valid_count(const OwnedColumn& col);
/// Return a copy of the column with its null mask removed.
std::unique_ptr<OwnedColumn> set_all_valid(const OwnedColumn& col);
// ── New functions (Phase 2) ──────────────────────────────────
/// Create a null mask device buffer.
/// `state`: 0=UNALLOCATED, 1=UNINITIALIZED, 2=ALL_VALID, 3=ALL_NULL.
std::unique_ptr<OwnedDeviceBuffer> create_null_mask(
int32_t size, int32_t state);
/// Count null values in a column.
int32_t null_count_column(const OwnedColumn& col);
/// Compute the number of bytes needed for a bitmask of given size.
int32_t bitmask_allocation_size(int32_t number_of_bits);
/// Copy a column's null mask to host.
void copy_null_mask_to_host(
const OwnedColumn& col,
rust::Slice<uint8_t> out);
/// Create a new column with a null mask set from host-side bytes.
std::unique_ptr<OwnedColumn> set_null_mask_from_host(
const OwnedColumn& col,
rust::Slice<const uint8_t> mask,
int32_t null_count);
/// Set bits in a device bitmask. Operates on the bitmask of the given column.
/// Returns a new column with the modified bitmask.
std::unique_ptr<OwnedColumn> set_null_mask_range(
const OwnedColumn& col,
int32_t begin_bit,
int32_t end_bit,
bool valid);
/// Copy a column's bitmask to host bytes.
/// Returns empty vec if column has no null mask.
rust::Vec<uint8_t> copy_bitmask_to_host(const OwnedColumn& col);
/// Builder for collecting column views for bitmask AND/OR operations.
struct BitmaskBuilder {
std::vector<cudf::column_view> views;
void add_column(const OwnedColumn& col) {
views.push_back(col.view());
}
int32_t num_columns() const {
return static_cast<int32_t>(views.size());
}
};
/// Result of a bitmask AND/OR operation.
struct BitmaskResult {
rust::Vec<uint8_t> mask;
int32_t null_count;
rust::Vec<uint8_t> get_mask() const { return mask; }
int32_t get_null_count() const { return null_count; }
};
std::unique_ptr<BitmaskBuilder> bitmask_builder_new();
/// Bitwise AND of null masks from multiple columns.
std::unique_ptr<BitmaskResult> bitmask_and(const BitmaskBuilder& builder);
/// Bitwise OR of null masks from multiple columns.
std::unique_ptr<BitmaskResult> bitmask_or(const BitmaskBuilder& builder);
/// Compute the null count for a given mask state and size.
int32_t state_null_count(int32_t state, int32_t size);
/// Compute the number of bitmask_type words needed for the given number of bits.
int32_t num_bitmask_words(int32_t number_of_bits);
/// Find the index of the first set (valid) bit in the column's null mask.
/// Returns 0 for non-nullable non-empty columns (all elements are valid).
/// Returns -1 if the column is empty or all null.
/// NOTE: Based on libcudf v26.04.00 API (PR #21207).
int32_t index_of_first_set_bit(const OwnedColumn& col);
} // namespace cudf_shims