1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
//! Page-level pruning utilities and exactness tracking.
//!
//! # Page-Level Pruning
//!
//! Page-level pruning uses column index (page statistics) to skip individual pages
//! within a row group, providing finer-grained I/O reduction than row-group-only pruning.
//!
//! # Exactness Tracking
//!
//! The `PagePruning` struct tracks whether a page selection is "exact" (definite) or
//! "inexact" (conservative):
//!
//! - **Exact** (`exact = true`): Every page is definitively True or False
//! - All pages returned TriState::True (keep) or TriState::False (skip)
//! - No Unknown pages that were conservatively kept
//! - Safe to invert for NOT operations
//!
//! - **Inexact** (`exact = false`): Some pages are uncertain or predicates unsupported
//! - Some pages returned TriState::Unknown (conservatively kept)
//! - Or some predicates in a conjunction couldn't be evaluated
//! - NOT inversion is unsafe (would incorrectly prune Unknown pages)
//!
//! # Example
//!
//! ```text
//! Predicate: a > 50
//! Page 0: min=1, max=10 → TriState::False (all < 50, skip)
//! Page 1: min=45, max=55 → TriState::Unknown (might contain > 50, keep)
//! Page 2: min=60, max=70 → TriState::True (all > 50, keep)
//!
//! Result: exact = false (Page 1 is Unknown)
//! NOT inversion: Rejected (would incorrectly skip Page 1's >50 values)
//! ```
use Range;
use ;
/// Page-level selection with exactness tracking.
///
/// Wraps a `RowSelection` with a flag indicating whether the selection is definite
/// (all pages are True/False) or conservative (some pages are Unknown).
///
/// # Exactness Semantics
///
/// - `exact = true`: All pages in the selection are definitively True or False
/// - No TriState::Unknown pages
/// - Safe to invert for NOT operations
/// - Indicates high-confidence pruning decisions
///
/// - `exact = false`: Some pages are uncertain or predicates are unsupported
/// - Contains TriState::Unknown pages (conservatively kept)
/// - Or some predicates in a conjunction couldn't be evaluated
/// - NOT inversion is disabled (would cause incorrect pruning)
///
/// # Usage
///
/// ```text
/// // Exact selection (all pages are True/False)
/// let exact_sel = PagePruning::new(selection, true);
/// let inverted = page::invert_selection(&exact_sel.selection, total_rows); // OK
///
/// // Inexact selection (some Unknown pages)
/// let inexact_sel = PagePruning::new(selection, false);
/// // NOT would check: if !inexact_sel.exact { return None } // Rejected
/// ```
pub
/// Builds row ranges for each page from offset index metadata.
///
/// Converts Parquet page locations (first row index per page) into
/// Rust ranges representing the row span of each page.
///
/// # Returns
///
/// - `Some(Vec<Range<usize>>)` - Row ranges for each page
/// - `None` - If offset index is empty
///
/// # Example
///
/// ```text
/// Input: page_locations = [
/// {first_row_index: 0},
/// {first_row_index: 100},
/// {first_row_index: 250}
/// ], row_group_rows = 300
///
/// Output: [0..100, 100..250, 250..300]
/// ```
pub
/// Inverts a row selection by flipping all select/skip selectors.
///
/// Used for NOT operations on page-level pruning. This is **only safe** when the
/// input selection is exact (all pages are definitively True/False, no Unknown pages).
///
/// # Arguments
///
/// * `selection` - The row selection to invert
/// * `total_rows` - Total number of rows in the row group
///
/// # Returns
///
/// - `Some(RowSelection)` - Inverted selection where skip↔select are flipped
/// - `None` - If the selection is invalid (covered_rows > total_rows)
///
/// # Inversion Logic
///
/// ```text
/// Original: [select(3), skip(3)]
/// Inverted: [skip(3), select(3)]
/// ```
///
/// # Edge Cases
///
/// 1. **Partial coverage** (covered_rows < total_rows):
/// - Trailing rows are assumed to be skipped in the original
/// - A skip selector is added for the trailing rows before inversion
///
/// ```text
/// Input: [select(3), skip(2)], total_rows=6
/// Step 1: Add trailing skip: [select(3), skip(2), skip(1)]
/// Step 2: Invert: [skip(3), select(2), select(1)]
/// ```
///
/// 2. **Over-coverage** (covered_rows > total_rows):
/// - Invalid state, returns None
/// - Defensive check to prevent logic errors
///
/// 3. **Exact coverage** (covered_rows == total_rows):
/// - No padding needed, directly inverts selectors
///
/// # Safety for NOT
///
/// This function should only be called when `exact = true`:
///
/// ```text
/// // Safe: All pages are True/False
/// let exact_sel = PagePruning::new(selection, true);
/// let inverted = invert_selection(&exact_sel.selection, total_rows); // OK
///
/// // Unsafe: Some pages are Unknown (conservatively kept)
/// let inexact_sel = PagePruning::new(selection, false);
/// // Caller must check: if !inexact_sel.exact { return None }
/// ```
///
/// If the selection contains Unknown pages that were conservatively kept (select),
/// inversion would incorrectly skip them, causing false negatives.
pub