Skip to main content

exoware_sql/
lib.rs

1pub mod proto;
2pub mod prune;
3pub mod server;
4
5mod aggregate;
6mod builder;
7mod codec;
8mod diagnostics;
9mod filter;
10mod predicate;
11mod scan;
12mod schema;
13mod types;
14mod writer;
15
16pub use schema::KvSchema;
17pub use server::{sql_connect_stack, SqlConnect, SqlServer};
18pub use types::default_orders_index_specs;
19pub use types::{
20    CellValue, IndexBackfillEvent, IndexBackfillOptions, IndexBackfillReport, IndexLayout,
21    IndexSpec, TableColumnConfig,
22};
23pub use writer::{BatchReceipt, BatchWriter, PreparedBatch, TableWriter};
24
25#[cfg(test)]
26mod tests {
27    #![allow(refining_impl_trait)]
28
29    use super::aggregate::*;
30    use super::builder::*;
31    use super::codec::*;
32    use super::diagnostics::*;
33    use super::filter::*;
34    use super::predicate::*;
35    use super::scan::*;
36    use super::types::*;
37    use super::writer::*;
38    use super::*;
39    use commonware_codec::Encode;
40    use datafusion::arrow::array::{Float64Array, Int64Array, LargeStringArray, StringViewArray};
41    use datafusion::arrow::datatypes::{i256, DataType, TimeUnit};
42    use datafusion::arrow::record_batch::RecordBatch;
43    use datafusion::common::ScalarValue;
44    use datafusion::logical_expr::{Expr, Operator};
45    use datafusion::physical_plan::ExecutionPlan;
46    use datafusion::prelude::SessionContext;
47    use exoware_sdk::keys::{Key, KeyCodec};
48    use exoware_sdk::kv_codec::{
49        canonicalize_reduced_group_values, decode_stored_row, encode_reduced_group_key,
50        eval_predicate, KvReducedValue, StoredRow,
51    };
52    use exoware_sdk::{RangeReduceOp, RangeReduceRequest, StoreBatchUpload, StoreClient};
53    use std::collections::{BTreeMap, HashSet};
54    use std::ops::Bound::{Included, Unbounded};
55    use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering as AtomicOrdering};
56    use std::sync::{Arc, Mutex};
57    use std::time::Duration;
58
59    use axum::Router;
60    use bytes::Bytes;
61    use connectrpc::{Chain, ConnectError, ConnectRpcService, RequestContext as Context};
62    use exoware_sdk::connect_compression_registry;
63    use exoware_sdk::kv_codec::{eval_expr, expr_needs_value};
64    use exoware_sdk::store::common::v1::KvEntry as ProtoKvEntry;
65    use exoware_sdk::store::ingest::v1::{
66        PutResponse as ProtoPutResponse, Service as IngestService,
67        ServiceServer as IngestServiceServer,
68    };
69    use exoware_sdk::store::query::v1::{
70        GetManyEntry as ProtoGetManyEntry, GetManyFrame as ProtoGetManyFrame,
71        GetResponse as ProtoGetResponse, RangeFrame as ProtoRangeFrame,
72        ReduceResponse as ProtoReduceResponse, Service as QueryService,
73        ServiceServer as QueryServiceServer,
74    };
75    use exoware_sdk::RangeMode;
76    use exoware_sdk::{
77        parse_range_traversal_direction, to_domain_reduce_request, to_proto_optional_reduced_value,
78        to_proto_reduced_value, RangeTraversalDirection, RangeTraversalModeError,
79    };
80    use exoware_sdk::{RangeReduceGroup, RangeReduceResponse, RangeReduceResult};
81    use futures::{stream, TryStreamExt};
82    use tokio::sync::{mpsc, oneshot, Notify};
83
84    /// Assert EXPLAIN text includes the same `query_stats=...` suffix as [`format_query_stats_explain`].
85    fn assert_explain_includes_query_stats_surface(
86        explain: &str,
87        surface: QueryStatsExplainSurface,
88    ) {
89        let expected = format!("query_stats={}", format_query_stats_explain(surface));
90        assert!(
91            explain.contains(&expected),
92            "expected EXPLAIN output to include `{expected}`\n{explain}"
93        );
94    }
95
96    fn simple_int64_model(prefix: u8) -> TableModel {
97        let config = KvTableConfig::new(
98            prefix,
99            vec![TableColumnConfig::new("id", DataType::Int64, false)],
100            vec!["id".to_string()],
101            vec![],
102        )
103        .unwrap();
104        TableModel::from_config(&config).unwrap()
105    }
106
107    fn codec_payload(codec: KeyCodec, key: &Key, offset: usize, len: usize) -> Vec<u8> {
108        codec.read_payload(key, offset, len).expect("codec payload")
109    }
110
111    fn primary_payload(model: &TableModel, key: &Key, offset: usize, len: usize) -> Vec<u8> {
112        codec_payload(model.primary_key_codec, key, offset, len)
113    }
114
115    fn index_payload(spec: &ResolvedIndexSpec, key: &Key, offset: usize, len: usize) -> Vec<u8> {
116        codec_payload(spec.codec, key, offset, len)
117    }
118
119    fn matches_primary_key(table_prefix: u8, key: &Key) -> bool {
120        primary_key_codec(table_prefix)
121            .expect("primary codec")
122            .matches(key)
123    }
124
125    fn matches_secondary_index_key(table_prefix: u8, index_id: u8, key: &Key) -> bool {
126        secondary_index_codec(table_prefix, index_id)
127            .expect("secondary codec")
128            .matches(key)
129    }
130
131    fn test_model() -> (TableModel, Vec<ResolvedIndexSpec>) {
132        let config = KvTableConfig::new(
133            0,
134            vec![
135                TableColumnConfig::new("region", DataType::Utf8, false),
136                TableColumnConfig::new("customer_id", DataType::Int64, false),
137                TableColumnConfig::new("order_id", DataType::Int64, false),
138                TableColumnConfig::new("amount_cents", DataType::Int64, false),
139                TableColumnConfig::new("status", DataType::Utf8, false),
140            ],
141            vec!["order_id".to_string()],
142            vec![
143                IndexSpec::new(
144                    "region_customer",
145                    vec!["region".to_string(), "customer_id".to_string()],
146                )
147                .expect("valid"),
148                IndexSpec::new(
149                    "status_customer",
150                    vec!["status".to_string(), "customer_id".to_string()],
151                )
152                .expect("valid"),
153            ],
154        )
155        .expect("valid config");
156        let model = TableModel::from_config(&config).expect("model");
157        let specs = model
158            .resolve_index_specs(&config.index_specs)
159            .expect("specs");
160        (model, specs)
161    }
162
163    fn zorder_test_model() -> (TableModel, Vec<ResolvedIndexSpec>) {
164        let config = KvTableConfig::new(
165            0,
166            vec![
167                TableColumnConfig::new("x", DataType::Int64, false),
168                TableColumnConfig::new("y", DataType::Int64, false),
169                TableColumnConfig::new("id", DataType::Int64, false),
170                TableColumnConfig::new("value", DataType::Int64, false),
171            ],
172            vec!["id".to_string()],
173            vec![
174                IndexSpec::new("xy_lex", vec!["x".to_string(), "y".to_string()])
175                    .expect("valid")
176                    .with_cover_columns(vec!["value".to_string()]),
177                IndexSpec::z_order("xy_z", vec!["x".to_string(), "y".to_string()])
178                    .expect("valid")
179                    .with_cover_columns(vec!["value".to_string()]),
180            ],
181        )
182        .expect("valid config");
183        let model = TableModel::from_config(&config).expect("model");
184        let specs = model
185            .resolve_index_specs(&config.index_specs)
186            .expect("specs");
187        (model, specs)
188    }
189
190    #[derive(Clone)]
191    struct MockState {
192        kv: Arc<Mutex<BTreeMap<Key, Bytes>>>,
193        range_calls: Arc<AtomicUsize>,
194        range_reduce_calls: Arc<AtomicUsize>,
195        sequence_number: Arc<AtomicU64>,
196    }
197
198    #[derive(Debug)]
199    struct MockGroupedReduceState {
200        group_values: Vec<Option<KvReducedValue>>,
201        states: Vec<PartialAggregateState>,
202    }
203
204    type MockReduceRow = (Vec<Option<KvReducedValue>>, Vec<Option<KvReducedValue>>);
205
206    fn extract_mock_reduce_row(
207        key: &Key,
208        value: &Bytes,
209        request: &RangeReduceRequest,
210    ) -> Option<MockReduceRow> {
211        let needs_value = request
212            .group_by
213            .iter()
214            .chain(
215                request
216                    .reducers
217                    .iter()
218                    .filter_map(|reducer| reducer.expr.as_ref()),
219            )
220            .any(expr_needs_value)
221            || request
222                .filter
223                .as_ref()
224                .is_some_and(exoware_sdk::kv_codec::predicate_needs_value);
225        let archived = if needs_value {
226            decode_stored_row(value.as_ref()).ok()
227        } else {
228            None
229        };
230
231        if let Some(filter) = &request.filter {
232            if !eval_predicate(key, archived.as_ref(), filter).ok()? {
233                return None;
234            }
235        }
236
237        let mut group_values = Vec::with_capacity(request.group_by.len());
238        for expr in &request.group_by {
239            let extracted_value = eval_expr(key, archived.as_ref(), expr).ok()?;
240            group_values.push(extracted_value);
241        }
242        canonicalize_reduced_group_values(&mut group_values);
243
244        let mut reducer_values = Vec::with_capacity(request.reducers.len());
245        for reducer in &request.reducers {
246            let extracted_value = match (&reducer.expr, archived.as_ref()) {
247                (None, _) => None,
248                (Some(expr), _) => eval_expr(key, archived.as_ref(), expr).ok()?,
249            };
250            reducer_values.push(extracted_value);
251        }
252
253        Some((group_values, reducer_values))
254    }
255
256    #[allow(clippy::result_large_err)]
257    fn ensure_min_sequence_number(
258        token: &Arc<AtomicU64>,
259        required: Option<u64>,
260    ) -> Result<(), ConnectError> {
261        let current = token.load(AtomicOrdering::Relaxed);
262        if let Some(required) = required {
263            if current < required {
264                return Err(ConnectError::aborted(format!(
265                    "consistency_not_ready: required={required}, current={current}"
266                )));
267            }
268        }
269        Ok(())
270    }
271
272    fn proto_range_entries_frame(results: Vec<(Key, Vec<u8>)>) -> ProtoRangeFrame {
273        ProtoRangeFrame {
274            results: results
275                .into_iter()
276                .map(|(key, value)| ProtoKvEntry {
277                    key: key.to_vec(),
278                    value: value.into(),
279                    ..Default::default()
280                })
281                .collect(),
282            detail: Some(query_detail(7)).into(),
283            ..Default::default()
284        }
285    }
286
287    fn query_detail(sequence_number: u64) -> exoware_sdk::store::query::v1::Detail {
288        exoware_sdk::store::query::v1::Detail {
289            sequence_number,
290            extra: Default::default(),
291            ..Default::default()
292        }
293    }
294
295    fn final_range_detail_frame(sequence_number: u64) -> ProtoRangeFrame {
296        ProtoRangeFrame {
297            detail: Some(query_detail(sequence_number)).into(),
298            ..Default::default()
299        }
300    }
301
302    fn final_get_many_detail_frame(sequence_number: u64) -> ProtoGetManyFrame {
303        ProtoGetManyFrame {
304            detail: Some(query_detail(sequence_number)).into(),
305            ..Default::default()
306        }
307    }
308
309    #[derive(Clone)]
310    struct MockIngestConnect {
311        state: MockState,
312    }
313
314    impl IngestService for MockIngestConnect {
315        async fn put(
316            &self,
317            _ctx: Context,
318            request: buffa::view::OwnedView<
319                exoware_sdk::store::ingest::v1::PutRequestView<'static>,
320            >,
321        ) -> connectrpc::ServiceResult<ProtoPutResponse> {
322            let mut parsed = Vec::<(Key, Bytes)>::new();
323            let wire = request.bytes();
324            for kv in request.kvs.iter() {
325                parsed.push((wire.slice_ref(kv.key), wire.slice_ref(kv.value)));
326            }
327            let mut guard = self.state.kv.lock().expect("kv mutex poisoned");
328            for (key, value) in parsed.iter() {
329                guard.insert(key.clone(), value.clone());
330            }
331            let seq = self
332                .state
333                .sequence_number
334                .fetch_add(1, AtomicOrdering::SeqCst)
335                + 1;
336            connectrpc::Response::ok(ProtoPutResponse {
337                sequence_number: seq,
338                ..Default::default()
339            })
340        }
341    }
342
343    #[derive(Clone)]
344    struct MockQueryConnect {
345        state: MockState,
346    }
347
348    impl QueryService for MockQueryConnect {
349        async fn get(
350            &self,
351            _ctx: Context,
352            request: buffa::view::OwnedView<exoware_sdk::store::query::v1::GetRequestView<'static>>,
353        ) -> connectrpc::ServiceResult<ProtoGetResponse> {
354            ensure_min_sequence_number(&self.state.sequence_number, request.min_sequence_number)?;
355            let key: Key = request.bytes().slice_ref(request.key);
356            let guard = self.state.kv.lock().expect("kv mutex poisoned");
357            let value = guard.get(&key).cloned();
358            let token = self.state.sequence_number.load(AtomicOrdering::Relaxed);
359            connectrpc::Response::ok(ProtoGetResponse {
360                value,
361                detail: Some(query_detail(token)).into(),
362                ..Default::default()
363            })
364        }
365
366        async fn range(
367            &self,
368            _ctx: Context,
369            request: buffa::view::OwnedView<
370                exoware_sdk::store::query::v1::RangeRequestView<'static>,
371            >,
372        ) -> connectrpc::ServiceResult<connectrpc::ServiceStream<ProtoRangeFrame>> {
373            ensure_min_sequence_number(&self.state.sequence_number, request.min_sequence_number)?;
374            self.state.range_calls.fetch_add(1, AtomicOrdering::SeqCst);
375
376            let wire = request.bytes();
377            let start_key: Key = wire.slice_ref(request.start);
378            let end_key: Key = wire.slice_ref(request.end);
379            let limit = request.limit.map(|v| v as usize).unwrap_or(usize::MAX);
380            let batch_size = usize::try_from(request.batch_size).unwrap_or(usize::MAX);
381            if batch_size == 0 {
382                return Err(ConnectError::invalid_argument(
383                    "invalid batch_size: expected positive integer",
384                ));
385            }
386
387            let mode = match parse_range_traversal_direction(request.mode) {
388                Ok(RangeTraversalDirection::Forward) => RangeMode::Forward,
389                Ok(RangeTraversalDirection::Reverse) => RangeMode::Reverse,
390                Err(RangeTraversalModeError::UnknownWireValue(v)) => {
391                    return Err(ConnectError::invalid_argument(format!(
392                        "unknown TraversalMode enum value {v}"
393                    )));
394                }
395            };
396
397            let state = self.state.clone();
398            let guard = state.kv.lock().expect("kv mutex poisoned");
399            // Match `StoreEngine::range_scan`: inclusive [start, end]; empty end = unbounded.
400            let range: (std::ops::Bound<&Key>, std::ops::Bound<&Key>) = (
401                Included(&start_key),
402                if end_key.is_empty() {
403                    Unbounded
404                } else {
405                    Included(&end_key)
406                },
407            );
408            let range_iter = guard.range::<Key, _>(range);
409            let iter: Box<dyn Iterator<Item = (&Key, &Bytes)> + Send> = match mode {
410                RangeMode::Forward => Box::new(range_iter),
411                RangeMode::Reverse => Box::new(range_iter.rev()),
412            };
413            let mut results: Vec<ProtoKvEntry> = Vec::new();
414            for (key, value) in iter.take(limit) {
415                results.push(ProtoKvEntry {
416                    key: key.to_vec(),
417                    value: value.clone(),
418                    ..Default::default()
419                });
420            }
421            drop(guard);
422            let token = state.sequence_number.load(AtomicOrdering::Relaxed);
423            let batch = batch_size.max(1);
424            let mut frames: Vec<Result<ProtoRangeFrame, ConnectError>> = Vec::new();
425            let mut emitted_frame = false;
426            for chunk in results.chunks(batch) {
427                frames.push(Ok(ProtoRangeFrame {
428                    results: chunk.to_vec(),
429                    detail: Some(query_detail(token)).into(),
430                    ..Default::default()
431                }));
432                emitted_frame = true;
433            }
434            if !emitted_frame {
435                frames.push(Ok(final_range_detail_frame(token)));
436            }
437            Ok(connectrpc::Response::stream(stream::iter(frames)))
438        }
439
440        async fn get_many(
441            &self,
442            _ctx: Context,
443            request: buffa::view::OwnedView<
444                exoware_sdk::store::query::v1::GetManyRequestView<'static>,
445            >,
446        ) -> connectrpc::ServiceResult<connectrpc::ServiceStream<ProtoGetManyFrame>> {
447            ensure_min_sequence_number(&self.state.sequence_number, request.min_sequence_number)?;
448            let batch_size = usize::try_from(request.batch_size)
449                .unwrap_or(usize::MAX)
450                .max(1);
451            let guard = self.state.kv.lock().expect("kv mutex poisoned");
452            let mut entries: Vec<ProtoGetManyEntry> = Vec::new();
453            let wire = request.bytes();
454            for key_bytes in request.keys.iter() {
455                let key: Key = wire.slice_ref(key_bytes);
456                let value = guard.get(&key).cloned();
457                entries.push(ProtoGetManyEntry {
458                    key: key.to_vec(),
459                    value,
460                    ..Default::default()
461                });
462            }
463            drop(guard);
464            let token = self.state.sequence_number.load(AtomicOrdering::Relaxed);
465            let mut frames: Vec<Result<ProtoGetManyFrame, ConnectError>> = Vec::new();
466            let mut emitted_frame = false;
467            for chunk in entries.chunks(batch_size) {
468                frames.push(Ok(ProtoGetManyFrame {
469                    results: chunk.to_vec(),
470                    detail: Some(query_detail(token)).into(),
471                    ..Default::default()
472                }));
473                emitted_frame = true;
474            }
475            if !emitted_frame {
476                frames.push(Ok(final_get_many_detail_frame(token)));
477            }
478            Ok(connectrpc::Response::stream(stream::iter(frames)))
479        }
480
481        async fn reduce(
482            &self,
483            _ctx: Context,
484            request: buffa::view::OwnedView<
485                exoware_sdk::store::query::v1::ReduceRequestView<'static>,
486            >,
487        ) -> connectrpc::ServiceResult<ProtoReduceResponse> {
488            ensure_min_sequence_number(&self.state.sequence_number, request.min_sequence_number)?;
489            self.state
490                .range_reduce_calls
491                .fetch_add(1, AtomicOrdering::SeqCst);
492            let owned = request.to_owned_message();
493            let start_key: Key = owned.start.clone().into();
494            let end_key: Key = owned.end.clone().into();
495            let reduce_req = owned
496                .params
497                .as_option()
498                .ok_or_else(|| ConnectError::invalid_argument("missing range reduce params"))?;
499            let domain_request =
500                to_domain_reduce_request(reduce_req).map_err(ConnectError::invalid_argument)?;
501
502            let state = self.state.clone();
503            let guard = state.kv.lock().expect("kv mutex poisoned");
504            let mut states = domain_request.group_by.is_empty().then(|| {
505                domain_request
506                    .reducers
507                    .iter()
508                    .map(|reducer| PartialAggregateState::from_op(reducer.op))
509                    .collect::<Vec<_>>()
510            });
511            let mut grouped = BTreeMap::<Vec<u8>, MockGroupedReduceState>::new();
512
513            let range: (std::ops::Bound<&Key>, std::ops::Bound<&Key>) = (
514                Included(&start_key),
515                if end_key.is_empty() {
516                    Unbounded
517                } else {
518                    Included(&end_key)
519                },
520            );
521            for (key, value) in guard.range::<Key, _>(range) {
522                let Some((group_values, reducer_values)) =
523                    extract_mock_reduce_row(key, value, &domain_request)
524                else {
525                    continue;
526                };
527                if domain_request.group_by.is_empty() {
528                    let states = states.as_mut().expect("scalar states");
529                    for ((state, reducer), value) in states
530                        .iter_mut()
531                        .zip(domain_request.reducers.iter())
532                        .zip(reducer_values)
533                    {
534                        match reducer.op {
535                            RangeReduceOp::CountAll => state
536                                .merge_partial(reducer.op, Some(&KvReducedValue::UInt64(1)))
537                                .map_err(|e| ConnectError::internal(e.to_string()))?,
538                            RangeReduceOp::CountField => {
539                                let partial =
540                                    KvReducedValue::UInt64(if value.is_some() { 1 } else { 0 });
541                                state
542                                    .merge_partial(reducer.op, Some(&partial))
543                                    .map_err(|e| ConnectError::internal(e.to_string()))?
544                            }
545                            _ => state
546                                .merge_partial(reducer.op, value.as_ref())
547                                .map_err(|e| ConnectError::internal(e.to_string()))?,
548                        }
549                    }
550                } else {
551                    let group_key = encode_reduced_group_key(&group_values);
552                    let group =
553                        grouped
554                            .entry(group_key)
555                            .or_insert_with(|| MockGroupedReduceState {
556                                group_values: group_values.clone(),
557                                states: domain_request
558                                    .reducers
559                                    .iter()
560                                    .map(|reducer| PartialAggregateState::from_op(reducer.op))
561                                    .collect(),
562                            });
563                    for ((state, reducer), value) in group
564                        .states
565                        .iter_mut()
566                        .zip(domain_request.reducers.iter())
567                        .zip(reducer_values)
568                    {
569                        match reducer.op {
570                            RangeReduceOp::CountAll => state
571                                .merge_partial(reducer.op, Some(&KvReducedValue::UInt64(1)))
572                                .map_err(|e| ConnectError::internal(e.to_string()))?,
573                            RangeReduceOp::CountField => {
574                                let partial =
575                                    KvReducedValue::UInt64(if value.is_some() { 1 } else { 0 });
576                                state
577                                    .merge_partial(reducer.op, Some(&partial))
578                                    .map_err(|e| ConnectError::internal(e.to_string()))?
579                            }
580                            _ => state
581                                .merge_partial(reducer.op, value.as_ref())
582                                .map_err(|e| ConnectError::internal(e.to_string()))?,
583                        }
584                    }
585                }
586            }
587
588            let response = if let Some(states) = states {
589                RangeReduceResponse {
590                    results: states
591                        .iter()
592                        .map(|state| RangeReduceResult {
593                            value: match state {
594                                PartialAggregateState::Count(count) => {
595                                    Some(KvReducedValue::UInt64(*count))
596                                }
597                                PartialAggregateState::Sum(value)
598                                | PartialAggregateState::Min(value)
599                                | PartialAggregateState::Max(value) => value.clone(),
600                            },
601                        })
602                        .collect(),
603                    groups: Vec::new(),
604                }
605            } else {
606                RangeReduceResponse {
607                    results: Vec::new(),
608                    groups: grouped
609                        .into_values()
610                        .map(|group| RangeReduceGroup {
611                            group_values: group.group_values,
612                            results: group
613                                .states
614                                .into_iter()
615                                .map(|state| RangeReduceResult {
616                                    value: match state {
617                                        PartialAggregateState::Count(count) => {
618                                            Some(KvReducedValue::UInt64(count))
619                                        }
620                                        PartialAggregateState::Sum(value)
621                                        | PartialAggregateState::Min(value)
622                                        | PartialAggregateState::Max(value) => value,
623                                    },
624                                })
625                                .collect(),
626                        })
627                        .collect(),
628                }
629            };
630            drop(guard);
631            let token = state.sequence_number.load(AtomicOrdering::Relaxed);
632            connectrpc::Response::ok(ProtoReduceResponse {
633                results: response
634                    .results
635                    .into_iter()
636                    .map(|result| exoware_sdk::store::query::v1::RangeReduceResult {
637                        value: result.value.map(to_proto_reduced_value).into(),
638                        ..Default::default()
639                    })
640                    .collect(),
641                groups: response
642                    .groups
643                    .into_iter()
644                    .map(|group| {
645                        let group_values_present: Vec<bool> =
646                            group.group_values.iter().map(|v| v.is_some()).collect();
647                        exoware_sdk::store::query::v1::RangeReduceGroup {
648                            group_values: group
649                                .group_values
650                                .into_iter()
651                                .map(to_proto_optional_reduced_value)
652                                .collect(),
653                            group_values_present,
654                            results: group
655                                .results
656                                .into_iter()
657                                .map(|result| exoware_sdk::store::query::v1::RangeReduceResult {
658                                    value: result.value.map(to_proto_reduced_value).into(),
659                                    ..Default::default()
660                                })
661                                .collect(),
662                            ..Default::default()
663                        }
664                    })
665                    .collect(),
666                detail: Some(query_detail(token)).into(),
667                ..Default::default()
668            })
669        }
670    }
671
672    async fn spawn_mock_server(state: MockState) -> (String, oneshot::Sender<()>) {
673        let connect = ConnectRpcService::new(Chain(
674            IngestServiceServer::new(MockIngestConnect {
675                state: state.clone(),
676            }),
677            QueryServiceServer::new(MockQueryConnect { state }),
678        ))
679        .with_compression(connect_compression_registry());
680        let app = Router::new().fallback_service(connect);
681
682        let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
683            .await
684            .expect("bind mock server");
685        let addr = listener.local_addr().expect("local addr");
686        let (shutdown_tx, shutdown_rx) = oneshot::channel::<()>();
687        tokio::spawn(async move {
688            axum::serve(listener, app)
689                .with_graceful_shutdown(async move {
690                    let _ = shutdown_rx.await;
691                })
692                .await
693                .expect("mock server should run");
694        });
695        (format!("http://{addr}"), shutdown_tx)
696    }
697
698    fn assert_count_scalar(batch: &RecordBatch, col_idx: usize, row_idx: usize, expected: u64) {
699        let scalar = ScalarValue::try_from_array(batch.column(col_idx), row_idx)
700            .expect("count scalar should decode");
701        match scalar {
702            ScalarValue::UInt64(Some(value)) => assert_eq!(value, expected),
703            ScalarValue::Int64(Some(value)) => assert_eq!(value, expected as i64),
704            other => panic!("unexpected count scalar: {other:?}"),
705        }
706    }
707
708    async fn explain_plan_rows(ctx: &SessionContext, sql: &str) -> Vec<(String, String)> {
709        let batches = ctx
710            .sql(&format!("EXPLAIN {sql}"))
711            .await
712            .expect("explain query")
713            .collect()
714            .await
715            .expect("explain collect");
716        let mut rows = Vec::new();
717        for batch in batches {
718            for row_idx in 0..batch.num_rows() {
719                let plan_type = scalar_to_string(
720                    &ScalarValue::try_from_array(batch.column(0), row_idx).expect("plan type"),
721                )
722                .expect("plan type string");
723                let plan = scalar_to_string(
724                    &ScalarValue::try_from_array(batch.column(1), row_idx).expect("plan"),
725                )
726                .expect("plan string");
727                rows.push((plan_type, plan));
728            }
729        }
730        rows
731    }
732
733    fn physical_plan_text(rows: &[(String, String)]) -> String {
734        rows.iter()
735            .filter(|(plan_type, _)| plan_type.contains("physical_plan"))
736            .map(|(_, plan)| plan.as_str())
737            .collect::<Vec<_>>()
738            .join("\n")
739    }
740
741    #[tokio::test]
742    async fn explain_reports_full_scan_like_primary_key_scan() {
743        let state = MockState {
744            kv: Arc::new(Mutex::new(BTreeMap::new())),
745            range_calls: Arc::new(AtomicUsize::new(0)),
746            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
747            sequence_number: Arc::new(AtomicU64::new(0)),
748        };
749        let (base_url, shutdown_tx) = spawn_mock_server(state).await;
750        let client = StoreClient::new(&base_url);
751
752        let schema = KvSchema::new(client)
753            .table(
754                "orders",
755                vec![
756                    TableColumnConfig::new("id", DataType::Int64, false),
757                    TableColumnConfig::new("status", DataType::Utf8, false),
758                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
759                ],
760                vec!["id".to_string()],
761                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
762                    .expect("valid")
763                    .with_cover_columns(vec!["amount_cents".to_string()])],
764            )
765            .expect("schema");
766        let ctx = SessionContext::new();
767        schema.register_all(&ctx).expect("register");
768
769        let explain =
770            physical_plan_text(&explain_plan_rows(&ctx, "SELECT id, status FROM orders").await);
771        assert!(explain.contains("KvScanExec:"));
772        assert!(explain.contains("mode=primary_key"));
773        assert!(explain.contains("predicate=<none>"));
774        assert!(explain.contains("row_recheck=false"));
775        assert!(explain.contains("full_scan_like=true"));
776        assert_explain_includes_query_stats_surface(
777            &explain,
778            QueryStatsExplainSurface::StreamedRangeDetail,
779        );
780
781        let _ = shutdown_tx.send(());
782    }
783
784    #[tokio::test]
785    async fn explain_reports_secondary_index_scan_and_row_recheck() {
786        let state = MockState {
787            kv: Arc::new(Mutex::new(BTreeMap::new())),
788            range_calls: Arc::new(AtomicUsize::new(0)),
789            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
790            sequence_number: Arc::new(AtomicU64::new(0)),
791        };
792        let (base_url, shutdown_tx) = spawn_mock_server(state).await;
793        let client = StoreClient::new(&base_url);
794
795        let schema = KvSchema::new(client)
796            .table(
797                "orders",
798                vec![
799                    TableColumnConfig::new("id", DataType::Int64, false),
800                    TableColumnConfig::new("status", DataType::Utf8, false),
801                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
802                ],
803                vec!["id".to_string()],
804                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
805                    .expect("valid")
806                    .with_cover_columns(vec!["amount_cents".to_string()])],
807            )
808            .expect("schema");
809        let ctx = SessionContext::new();
810        schema.register_all(&ctx).expect("register");
811
812        let explain = physical_plan_text(
813            &explain_plan_rows(
814                &ctx,
815                "SELECT id, status, amount_cents FROM orders \
816                 WHERE status = 'open' AND amount_cents >= 5",
817            )
818            .await,
819        );
820        assert!(explain.contains("KvScanExec:"));
821        assert!(explain.contains("mode=secondary_index(status_idx, lexicographic)"));
822        assert!(explain.contains("predicate=status = 'open' AND amount_cents >= 5"));
823        assert!(explain.contains("exact=false"));
824        assert!(explain.contains("row_recheck=true"));
825        assert!(explain.contains("full_scan_like=false"));
826        assert_explain_includes_query_stats_surface(
827            &explain,
828            QueryStatsExplainSurface::StreamedRangeDetail,
829        );
830
831        let _ = shutdown_tx.send(());
832    }
833
834    #[tokio::test]
835    async fn explain_reports_zorder_secondary_index_scan() {
836        let state = MockState {
837            kv: Arc::new(Mutex::new(BTreeMap::new())),
838            range_calls: Arc::new(AtomicUsize::new(0)),
839            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
840            sequence_number: Arc::new(AtomicU64::new(0)),
841        };
842        let (base_url, shutdown_tx) = spawn_mock_server(state).await;
843        let client = StoreClient::new(&base_url);
844
845        let schema = KvSchema::new(client)
846            .table(
847                "points",
848                vec![
849                    TableColumnConfig::new("x", DataType::Int64, false),
850                    TableColumnConfig::new("y", DataType::Int64, false),
851                    TableColumnConfig::new("id", DataType::Int64, false),
852                    TableColumnConfig::new("value", DataType::Int64, false),
853                ],
854                vec!["id".to_string()],
855                vec![
856                    IndexSpec::z_order("xy_z", vec!["x".to_string(), "y".to_string()])
857                        .expect("valid")
858                        .with_cover_columns(vec!["value".to_string()]),
859                ],
860            )
861            .expect("schema");
862        let ctx = SessionContext::new();
863        schema.register_all(&ctx).expect("register");
864
865        let explain = physical_plan_text(
866            &explain_plan_rows(
867                &ctx,
868                "SELECT id, value FROM points \
869                 WHERE x >= 1 AND x <= 2 AND y >= 1 AND y <= 2",
870            )
871            .await,
872        );
873        assert!(explain.contains("KvScanExec:"));
874        assert!(explain.contains("mode=secondary_index(xy_z, z_order)"));
875        assert!(explain.contains("exact=false"));
876        assert!(explain.contains("row_recheck=true"));
877
878        let _ = shutdown_tx.send(());
879    }
880
881    #[tokio::test]
882    async fn explain_reports_aggregate_pushdown_access_path_details() {
883        let state = MockState {
884            kv: Arc::new(Mutex::new(BTreeMap::new())),
885            range_calls: Arc::new(AtomicUsize::new(0)),
886            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
887            sequence_number: Arc::new(AtomicU64::new(0)),
888        };
889        let (base_url, shutdown_tx) = spawn_mock_server(state).await;
890        let client = StoreClient::new(&base_url);
891
892        let schema = KvSchema::new(client)
893            .table(
894                "orders",
895                vec![
896                    TableColumnConfig::new("id", DataType::Int64, false),
897                    TableColumnConfig::new("status", DataType::Utf8, false),
898                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
899                ],
900                vec!["id".to_string()],
901                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
902                    .expect("valid")
903                    .with_cover_columns(vec!["amount_cents".to_string()])],
904            )
905            .expect("schema");
906        let ctx = SessionContext::new();
907        schema.register_all(&ctx).expect("register");
908
909        let explain = physical_plan_text(
910            &explain_plan_rows(
911                &ctx,
912                "SELECT status, SUM(amount_cents) AS total_cents \
913                 FROM orders WHERE status = 'open' GROUP BY status",
914            )
915            .await,
916        );
917        assert!(explain.contains("KvAggregateExec:"));
918        assert!(explain.contains("grouped=true"));
919        assert!(explain.contains("job0{mode=secondary_index(status_idx, lexicographic)"));
920        assert!(explain.contains("predicate=status = 'open'"));
921        assert!(explain.contains("exact=true"));
922        assert!(explain.contains("row_recheck=false"));
923        assert_explain_includes_query_stats_surface(
924            &explain,
925            QueryStatsExplainSurface::RangeReduceDetail,
926        );
927
928        let _ = shutdown_tx.send(());
929    }
930
931    #[test]
932    fn index_spec_constructor_sets_name_and_keys() {
933        let spec = IndexSpec::new(
934            "status_customer",
935            vec!["status".to_string(), "customer_id".to_string()],
936        )
937        .expect("valid index spec");
938        assert_eq!(spec.name(), "status_customer");
939        assert_eq!(spec.key_columns(), &["status", "customer_id"]);
940        assert!(spec.cover_columns().is_empty());
941    }
942
943    #[test]
944    fn index_spec_cover_columns_are_configurable_in_code() {
945        let spec = IndexSpec::new("status_customer", vec!["status".to_string()])
946            .expect("valid")
947            .with_cover_columns(vec!["amount_cents".to_string()]);
948        assert_eq!(spec.key_columns(), &["status"]);
949        assert_eq!(spec.cover_columns(), &["amount_cents"]);
950    }
951
952    #[test]
953    fn describe_in_list_places_truncation_ellipsis_inside_parentheses() {
954        let rendered = describe_in_list((1..=6).map(|v| v.to_string()));
955        assert_eq!(rendered, "IN (1, 2, 3, 4, 5, ...)");
956    }
957
958    #[test]
959    fn normalize_sum_case_then_one_uses_countall_optimization() {
960        let (model, _) = test_model();
961        let argument = normalize_case_then_expr(
962            AggregatePushdownFunction::Sum,
963            &Expr::Literal(ScalarValue::Int64(Some(1)), None),
964            &model,
965        )
966        .expect("normalize");
967        assert_eq!(argument, AggregatePushdownArgument::CountAll);
968    }
969
970    #[test]
971    fn normalize_count_case_then_literal_uses_countall_optimization() {
972        use datafusion::logical_expr::col;
973
974        let (model, _) = test_model();
975        let case_expr = Expr::Case(datafusion::logical_expr::expr::Case {
976            expr: None,
977            when_then_expr: vec![(
978                Box::new(col("status").eq(Expr::Literal(
979                    ScalarValue::Utf8(Some("open".to_string())),
980                    None,
981                ))),
982                Box::new(Expr::Literal(
983                    ScalarValue::Utf8(Some("yes".to_string())),
984                    None,
985                )),
986            )],
987            else_expr: Some(Box::new(Expr::Literal(ScalarValue::Utf8(None), None))),
988        });
989
990        let (func, argument, filter) =
991            normalize_count_aggregate_argument(&case_expr, &model).expect("normalize");
992        assert_eq!(func, AggregatePushdownFunction::Count);
993        assert_eq!(argument, AggregatePushdownArgument::CountAll);
994        assert!(filter.is_some());
995    }
996
997    #[test]
998    fn reduced_value_to_scalar_preserves_timestamp_timezone_label() {
999        let tz: Arc<str> = Arc::from("America/New_York");
1000        let scalar = reduced_value_to_scalar(
1001            Some(KvReducedValue::Timestamp(1_700_000_000_000_000)),
1002            &DataType::Timestamp(TimeUnit::Microsecond, Some(tz.clone())),
1003        )
1004        .expect("timestamp scalar");
1005        assert_eq!(
1006            scalar,
1007            ScalarValue::TimestampMicrosecond(Some(1_700_000_000_000_000), Some(tz))
1008        );
1009    }
1010
1011    #[test]
1012    fn index_spec_cover_pk_column_is_rejected() {
1013        let config = KvTableConfig::new(
1014            0,
1015            vec![
1016                TableColumnConfig::new("id", DataType::Int64, false),
1017                TableColumnConfig::new("status", DataType::Utf8, false),
1018            ],
1019            vec!["id".to_string()],
1020            vec![IndexSpec::new("status_idx", vec!["status".to_string()])
1021                .expect("valid")
1022                .with_cover_columns(vec!["id".to_string()])],
1023        )
1024        .expect("valid config");
1025        let model = TableModel::from_config(&config).expect("model");
1026        let err = model
1027            .resolve_index_specs(&config.index_specs)
1028            .expect_err("covering a PK column must be rejected");
1029        assert!(err.contains("primary key column"));
1030    }
1031
1032    #[test]
1033    fn access_plan_requires_cover_columns_for_index_scan() {
1034        let (model, _) = test_model();
1035        let predicate = QueryPredicate::default();
1036        let projection = Some(vec![
1037            *model.columns_by_name.get("order_id").unwrap(),
1038            *model.columns_by_name.get("amount_cents").unwrap(),
1039        ]);
1040        let plan = ScanAccessPlan::new(&model, &projection, &predicate);
1041
1042        let no_cover = IndexSpec::new("status_idx", vec!["status".to_string()]).unwrap();
1043        let with_cover = IndexSpec::new("status_idx", vec!["status".to_string()])
1044            .unwrap()
1045            .with_cover_columns(vec!["amount_cents".to_string()]);
1046        let no_cover_resolved = model.resolve_index_specs(&[no_cover]).unwrap();
1047        let with_cover_resolved = model.resolve_index_specs(&[with_cover]).unwrap();
1048
1049        assert!(!plan.index_covers_required_non_pk(&no_cover_resolved[0]));
1050        assert!(plan.index_covers_required_non_pk(&with_cover_resolved[0]));
1051    }
1052
1053    #[test]
1054    fn choose_index_plan_prefers_longer_prefix() {
1055        let (model, specs) = test_model();
1056        let region_idx = *model.columns_by_name.get("region").unwrap();
1057        let customer_idx = *model.columns_by_name.get("customer_id").unwrap();
1058        let mut predicate = QueryPredicate::default();
1059        predicate.constraints.insert(
1060            region_idx,
1061            PredicateConstraint::StringEq("us-east".to_string()),
1062        );
1063        predicate.constraints.insert(
1064            customer_idx,
1065            PredicateConstraint::IntRange {
1066                min: Some(10),
1067                max: Some(20),
1068            },
1069        );
1070        let plan = predicate
1071            .choose_index_plan(&model, &specs)
1072            .expect("plan")
1073            .expect("exists");
1074        assert_eq!(plan.spec_idx, 0);
1075        assert_eq!(plan.constrained_prefix_len, 2);
1076    }
1077
1078    #[test]
1079    fn choose_index_plan_prefers_covering_index_when_prefix_strength_ties() {
1080        let config = KvTableConfig::new(
1081            0,
1082            vec![
1083                TableColumnConfig::new("id", DataType::Int64, false),
1084                TableColumnConfig::new("status", DataType::Utf8, false),
1085                TableColumnConfig::new("amount_cents", DataType::Int64, false),
1086            ],
1087            vec!["id".to_string()],
1088            vec![
1089                IndexSpec::new("status_plain", vec!["status".to_string()]).expect("valid"),
1090                IndexSpec::new("status_covering", vec!["status".to_string()])
1091                    .expect("valid")
1092                    .with_cover_columns(vec!["amount_cents".to_string()]),
1093            ],
1094        )
1095        .expect("config");
1096        let model = TableModel::from_config(&config).expect("model");
1097        let specs = model
1098            .resolve_index_specs(&config.index_specs)
1099            .expect("specs");
1100        let status_idx = *model.columns_by_name.get("status").unwrap();
1101        let amount_idx = *model.columns_by_name.get("amount_cents").unwrap();
1102        let mut predicate = QueryPredicate::default();
1103        predicate.constraints.insert(
1104            status_idx,
1105            PredicateConstraint::StringEq("open".to_string()),
1106        );
1107        predicate.constraints.insert(
1108            amount_idx,
1109            PredicateConstraint::IntRange {
1110                min: Some(10),
1111                max: None,
1112            },
1113        );
1114
1115        let plan = predicate
1116            .choose_index_plan(&model, &specs)
1117            .expect("plan")
1118            .expect("exists");
1119        assert_eq!(specs[plan.spec_idx].name, "status_covering");
1120    }
1121
1122    #[test]
1123    fn choose_index_plan_prefers_zorder_for_multi_column_box_constraints() {
1124        let (model, specs) = zorder_test_model();
1125        let x_idx = *model.columns_by_name.get("x").unwrap();
1126        let y_idx = *model.columns_by_name.get("y").unwrap();
1127        let mut predicate = QueryPredicate::default();
1128        predicate.constraints.insert(
1129            x_idx,
1130            PredicateConstraint::IntRange {
1131                min: Some(1),
1132                max: Some(2),
1133            },
1134        );
1135        predicate.constraints.insert(
1136            y_idx,
1137            PredicateConstraint::IntRange {
1138                min: Some(1),
1139                max: Some(2),
1140            },
1141        );
1142
1143        let plan = predicate
1144            .choose_index_plan(&model, &specs)
1145            .expect("plan")
1146            .expect("exists");
1147        assert_eq!(specs[plan.spec_idx].name, "xy_z");
1148        assert_eq!(specs[plan.spec_idx].layout, IndexLayout::ZOrder);
1149        assert_eq!(plan.constrained_column_count, 2);
1150    }
1151
1152    #[test]
1153    fn secondary_index_key_round_trip() {
1154        let (model, specs) = test_model();
1155        let row = KvRow {
1156            values: vec![
1157                CellValue::Utf8("us-east".to_string()),
1158                CellValue::Int64(42),
1159                CellValue::Int64(9001),
1160                CellValue::Int64(1500),
1161                CellValue::Utf8("open".to_string()),
1162            ],
1163        };
1164        let key = encode_secondary_index_key(model.table_prefix, &specs[0], &model, &row)
1165            .expect("encode");
1166        let decoded = decode_secondary_index_key(model.table_prefix, &specs[0], &model, &key)
1167            .expect("decode");
1168        let region_idx = *model.columns_by_name.get("region").unwrap();
1169        let customer_idx = *model.columns_by_name.get("customer_id").unwrap();
1170        assert!(matches!(
1171            decoded.values.get(&region_idx),
1172            Some(CellValue::Utf8(v)) if v == "us-east"
1173        ));
1174        assert!(matches!(
1175            decoded.values.get(&customer_idx),
1176            Some(CellValue::Int64(v)) if *v == 42
1177        ));
1178        assert!(matches!(
1179            &decoded.primary_key_values[0],
1180            CellValue::Int64(9001)
1181        ));
1182        let expected_pk = encode_primary_key_from_row(model.table_prefix, &row, &model)
1183            .expect("primary key should encode");
1184        assert_eq!(decoded.primary_key, expected_pk);
1185    }
1186
1187    #[test]
1188    fn zorder_secondary_index_key_round_trip() {
1189        let (model, specs) = zorder_test_model();
1190        let row = KvRow {
1191            values: vec![
1192                CellValue::Int64(2),
1193                CellValue::Int64(1),
1194                CellValue::Int64(42),
1195                CellValue::Int64(900),
1196            ],
1197        };
1198        let key = encode_secondary_index_key(model.table_prefix, &specs[1], &model, &row)
1199            .expect("encode");
1200        let decoded = decode_secondary_index_key(model.table_prefix, &specs[1], &model, &key)
1201            .expect("decode");
1202        let x_idx = *model.columns_by_name.get("x").unwrap();
1203        let y_idx = *model.columns_by_name.get("y").unwrap();
1204        assert!(matches!(
1205            decoded.values.get(&x_idx),
1206            Some(CellValue::Int64(v)) if *v == 2
1207        ));
1208        assert!(matches!(
1209            decoded.values.get(&y_idx),
1210            Some(CellValue::Int64(v)) if *v == 1
1211        ));
1212        assert!(matches!(
1213            &decoded.primary_key_values[0],
1214            CellValue::Int64(42)
1215        ));
1216    }
1217
1218    #[test]
1219    fn table_config_supports_non_orders_schema() {
1220        let config = KvTableConfig::new(
1221            0,
1222            vec![
1223                TableColumnConfig::new("tenant", DataType::Utf8, false),
1224                TableColumnConfig::new("id", DataType::Int64, false),
1225                TableColumnConfig::new("score", DataType::Int64, false),
1226            ],
1227            vec!["id".to_string()],
1228            vec![IndexSpec::new(
1229                "tenant_score",
1230                vec!["tenant".to_string(), "score".to_string()],
1231            )
1232            .expect("valid")],
1233        )
1234        .expect("schema agnostic config should be valid");
1235        assert_eq!(config.primary_key_columns, vec!["id".to_string()]);
1236        assert_eq!(config.columns.len(), 3);
1237    }
1238
1239    #[test]
1240    fn table_config_accepts_float64_column() {
1241        let config = KvTableConfig::new(
1242            0,
1243            vec![
1244                TableColumnConfig::new("id", DataType::Int64, false),
1245                TableColumnConfig::new("price", DataType::Float64, false),
1246            ],
1247            vec!["id".to_string()],
1248            vec![],
1249        )
1250        .expect("Float64 column should be accepted");
1251        assert_eq!(config.columns.len(), 2);
1252    }
1253
1254    #[test]
1255    fn table_config_accepts_boolean_column() {
1256        let config = KvTableConfig::new(
1257            0,
1258            vec![
1259                TableColumnConfig::new("id", DataType::Int64, false),
1260                TableColumnConfig::new("active", DataType::Boolean, false),
1261            ],
1262            vec!["id".to_string()],
1263            vec![],
1264        )
1265        .expect("Boolean column should be accepted");
1266        assert_eq!(config.columns.len(), 2);
1267    }
1268
1269    #[test]
1270    fn build_projected_batch_uses_large_utf8_type() {
1271        let config = KvTableConfig::new(
1272            0,
1273            vec![
1274                TableColumnConfig::new("id", DataType::Int64, false),
1275                TableColumnConfig::new("name", DataType::LargeUtf8, false),
1276            ],
1277            vec!["id".to_string()],
1278            vec![],
1279        )
1280        .unwrap();
1281        let model = TableModel::from_config(&config).unwrap();
1282        let rows = vec![KvRow {
1283            values: vec![CellValue::Int64(1), CellValue::Utf8("hello".to_string())],
1284        }];
1285        let batch = build_projected_batch(&rows, &model, &model.schema, &None).unwrap();
1286        assert_eq!(batch.column(1).data_type(), &DataType::LargeUtf8);
1287        let values = batch
1288            .column(1)
1289            .as_any()
1290            .downcast_ref::<LargeStringArray>()
1291            .expect("must build LargeStringArray");
1292        assert_eq!(values.value(0), "hello");
1293    }
1294
1295    #[test]
1296    fn build_projected_batch_uses_utf8_view_type() {
1297        let config = KvTableConfig::new(
1298            0,
1299            vec![
1300                TableColumnConfig::new("id", DataType::Int64, false),
1301                TableColumnConfig::new("name", DataType::Utf8View, false),
1302            ],
1303            vec!["id".to_string()],
1304            vec![],
1305        )
1306        .unwrap();
1307        let model = TableModel::from_config(&config).unwrap();
1308        let rows = vec![KvRow {
1309            values: vec![CellValue::Int64(1), CellValue::Utf8("hello".to_string())],
1310        }];
1311        let batch = build_projected_batch(&rows, &model, &model.schema, &None).unwrap();
1312        assert_eq!(batch.column(1).data_type(), &DataType::Utf8View);
1313        let values = batch
1314            .column(1)
1315            .as_any()
1316            .downcast_ref::<StringViewArray>()
1317            .expect("must build StringViewArray");
1318        assert_eq!(values.value(0), "hello");
1319    }
1320
1321    #[test]
1322    fn f64_ordered_encoding_preserves_order() {
1323        let values = [
1324            f64::NEG_INFINITY,
1325            f64::MIN,
1326            -1000.0,
1327            -1.0,
1328            -0.001,
1329            0.0,
1330            0.001,
1331            1.0,
1332            1000.0,
1333            f64::MAX,
1334            f64::INFINITY,
1335        ];
1336        let encoded: Vec<[u8; 8]> = values.iter().map(|v| encode_f64_ordered(*v)).collect();
1337        for i in 0..encoded.len() - 1 {
1338            assert!(
1339                encoded[i] < encoded[i + 1],
1340                "encode_f64_ordered({}) >= encode_f64_ordered({})",
1341                values[i],
1342                values[i + 1]
1343            );
1344        }
1345    }
1346
1347    #[test]
1348    fn f64_ordered_encoding_round_trip() {
1349        let values = [
1350            f64::MIN,
1351            -42.5,
1352            -0.0,
1353            0.0,
1354            3.125,
1355            f64::MAX,
1356            f64::INFINITY,
1357            f64::NEG_INFINITY,
1358        ];
1359        for v in values {
1360            let encoded = encode_f64_ordered(v);
1361            let decoded = decode_f64_ordered(encoded);
1362            assert!(
1363                v.to_bits() == decoded.to_bits(),
1364                "round-trip failed for {v}: got {decoded}"
1365            );
1366        }
1367    }
1368
1369    fn mixed_model() -> (TableModel, Vec<ResolvedIndexSpec>) {
1370        let config = KvTableConfig::new(
1371            0,
1372            vec![
1373                TableColumnConfig::new("id", DataType::Int64, false),
1374                TableColumnConfig::new("label", DataType::Utf8, false),
1375                TableColumnConfig::new("score", DataType::Float64, false),
1376                TableColumnConfig::new("active", DataType::Boolean, false),
1377            ],
1378            vec!["id".to_string()],
1379            vec![
1380                IndexSpec::new(
1381                    "active_score",
1382                    vec!["active".to_string(), "score".to_string()],
1383                )
1384                .expect("valid"),
1385                IndexSpec::new("label_idx", vec!["label".to_string()]).expect("valid"),
1386            ],
1387        )
1388        .expect("valid config");
1389        let model = TableModel::from_config(&config).expect("model");
1390        let specs = model
1391            .resolve_index_specs(&config.index_specs)
1392            .expect("specs");
1393        (model, specs)
1394    }
1395
1396    #[test]
1397    fn secondary_index_key_round_trip_with_float64_and_boolean() {
1398        let (model, specs) = mixed_model();
1399        let row = KvRow {
1400            values: vec![
1401                CellValue::Int64(100),
1402                CellValue::Utf8("hello".to_string()),
1403                CellValue::Float64(3.125),
1404                CellValue::Boolean(true),
1405            ],
1406        };
1407        let key = encode_secondary_index_key(model.table_prefix, &specs[0], &model, &row)
1408            .expect("encode");
1409        let decoded = decode_secondary_index_key(model.table_prefix, &specs[0], &model, &key)
1410            .expect("decode");
1411        let active_idx = *model.columns_by_name.get("active").unwrap();
1412        let score_idx = *model.columns_by_name.get("score").unwrap();
1413        assert!(matches!(
1414            decoded.values.get(&active_idx),
1415            Some(CellValue::Boolean(true))
1416        ));
1417        assert!(
1418            matches!(decoded.values.get(&score_idx), Some(CellValue::Float64(v)) if (*v - 3.125).abs() < f64::EPSILON)
1419        );
1420        assert!(matches!(
1421            &decoded.primary_key_values[0],
1422            CellValue::Int64(100)
1423        ));
1424    }
1425
1426    #[test]
1427    fn base_row_round_trip_with_float64_and_boolean() {
1428        let (model, _specs) = mixed_model();
1429        let row = KvRow {
1430            values: vec![
1431                CellValue::Int64(42),
1432                CellValue::Utf8("world".to_string()),
1433                CellValue::Float64(-99.5),
1434                CellValue::Boolean(false),
1435            ],
1436        };
1437        let encoded = encode_base_row_value(&row, &model).expect("encode");
1438        let decoded =
1439            decode_base_row(vec![CellValue::Int64(42)], &encoded, &model).expect("decode");
1440        assert!(matches!(&decoded.values[0], CellValue::Int64(42)));
1441        assert!(matches!(&decoded.values[1], CellValue::Utf8(v) if v == "world"));
1442        assert!(
1443            matches!(&decoded.values[2], CellValue::Float64(v) if (*v - (-99.5)).abs() < f64::EPSILON)
1444        );
1445        assert!(matches!(&decoded.values[3], CellValue::Boolean(false)));
1446    }
1447
1448    #[test]
1449    fn predicate_bool_eq_matches() {
1450        let (model, _specs) = mixed_model();
1451        let active_idx = *model.columns_by_name.get("active").unwrap();
1452        let mut pred = QueryPredicate::default();
1453        pred.constraints
1454            .insert(active_idx, PredicateConstraint::BoolEq(true));
1455        let row_true = KvRow {
1456            values: vec![
1457                CellValue::Int64(1),
1458                CellValue::Utf8("a".to_string()),
1459                CellValue::Float64(1.0),
1460                CellValue::Boolean(true),
1461            ],
1462        };
1463        let row_false = KvRow {
1464            values: vec![
1465                CellValue::Int64(2),
1466                CellValue::Utf8("b".to_string()),
1467                CellValue::Float64(2.0),
1468                CellValue::Boolean(false),
1469            ],
1470        };
1471        assert!(pred.matches_row(&row_true));
1472        assert!(!pred.matches_row(&row_false));
1473    }
1474
1475    #[test]
1476    fn predicate_float_range_matches() {
1477        let (model, _specs) = mixed_model();
1478        let score_idx = *model.columns_by_name.get("score").unwrap();
1479        let mut pred = QueryPredicate::default();
1480        pred.constraints.insert(
1481            score_idx,
1482            PredicateConstraint::FloatRange {
1483                min: Some((2.0, true)),
1484                max: Some((5.0, false)),
1485            },
1486        );
1487        let make_row = |score: f64| KvRow {
1488            values: vec![
1489                CellValue::Int64(1),
1490                CellValue::Utf8("a".to_string()),
1491                CellValue::Float64(score),
1492                CellValue::Boolean(true),
1493            ],
1494        };
1495        assert!(!pred.matches_row(&make_row(1.99)));
1496        assert!(pred.matches_row(&make_row(2.0)));
1497        assert!(pred.matches_row(&make_row(3.5)));
1498        assert!(pred.matches_row(&make_row(4.99)));
1499        assert!(!pred.matches_row(&make_row(5.0)));
1500        assert!(!pred.matches_row(&make_row(5.01)));
1501    }
1502
1503    #[test]
1504    fn float_range_rejects_nan_row_value() {
1505        let constraint = PredicateConstraint::FloatRange {
1506            min: Some((0.0, true)),
1507            max: Some((10.0, true)),
1508        };
1509        assert!(!matches_constraint(
1510            &CellValue::Float64(f64::NAN),
1511            &constraint
1512        ));
1513    }
1514
1515    #[test]
1516    fn index_plan_with_boolean_prefix() {
1517        let (model, specs) = mixed_model();
1518        let active_idx = *model.columns_by_name.get("active").unwrap();
1519        let mut pred = QueryPredicate::default();
1520        pred.constraints
1521            .insert(active_idx, PredicateConstraint::BoolEq(true));
1522        let plan = pred
1523            .choose_index_plan(&model, &specs)
1524            .expect("plan")
1525            .expect("should find index");
1526        assert_eq!(plan.spec_idx, 0);
1527        assert_eq!(plan.constrained_prefix_len, 1);
1528    }
1529
1530    #[test]
1531    fn float_constraint_contradiction() {
1532        let mut lo: Option<(f64, bool)> = None;
1533        let mut hi: Option<(f64, bool)> = None;
1534        let mut contradiction = false;
1535        apply_float_constraint(&mut lo, &mut hi, Operator::Gt, 10.0, &mut contradiction);
1536        assert!(!contradiction);
1537        apply_float_constraint(&mut lo, &mut hi, Operator::Lt, 5.0, &mut contradiction);
1538        assert!(contradiction);
1539    }
1540
1541    #[test]
1542    fn float_constraint_eq_then_range_contradicts() {
1543        let mut lo: Option<(f64, bool)> = None;
1544        let mut hi: Option<(f64, bool)> = None;
1545        let mut contradiction = false;
1546        apply_float_constraint(&mut lo, &mut hi, Operator::Eq, 5.0, &mut contradiction);
1547        assert!(!contradiction);
1548        apply_float_constraint(&mut lo, &mut hi, Operator::Gt, 5.0, &mut contradiction);
1549        assert!(contradiction);
1550    }
1551
1552    #[test]
1553    fn float_nan_literal_comparison_marks_contradiction() {
1554        let config = KvTableConfig::new(
1555            0,
1556            vec![
1557                TableColumnConfig::new("id", DataType::Int64, false),
1558                TableColumnConfig::new("score", DataType::Float64, false),
1559            ],
1560            vec!["id".to_string()],
1561            vec![],
1562        )
1563        .unwrap();
1564        let model = TableModel::from_config(&config).unwrap();
1565
1566        use datafusion::logical_expr::col;
1567        let filter = col("score").gt(Expr::Literal(ScalarValue::Float64(Some(f64::NAN)), None));
1568        assert!(QueryPredicate::supports_filter(&filter, &model));
1569
1570        let pred = QueryPredicate::from_filters(&[filter], &model);
1571        assert!(
1572            pred.contradiction,
1573            "comparison with NaN literal must produce contradiction"
1574        );
1575    }
1576
1577    #[test]
1578    fn null_predicate_merges_are_order_independent() {
1579        use datafusion::logical_expr::col;
1580        let config = KvTableConfig::new(
1581            0,
1582            vec![
1583                TableColumnConfig::new("id", DataType::Int64, false),
1584                TableColumnConfig::new("label", DataType::Utf8, true),
1585            ],
1586            vec!["id".to_string()],
1587            vec![],
1588        )
1589        .unwrap();
1590        let model = TableModel::from_config(&config).unwrap();
1591        let label_idx = *model.columns_by_name.get("label").unwrap();
1592        let eq_foo = col("label").eq(Expr::Literal(
1593            ScalarValue::Utf8(Some("foo".to_string())),
1594            None,
1595        ));
1596        let is_null = col("label").is_null();
1597        let is_not_null = col("label").is_not_null();
1598        let row_foo = KvRow {
1599            values: vec![CellValue::Int64(1), CellValue::Utf8("foo".to_string())],
1600        };
1601
1602        // (label = 'foo') AND (label IS NOT NULL) — IS NOT NULL is implied;
1603        // predicate must reduce to StringEq('foo') in either order.
1604        for filters in [[&eq_foo, &is_not_null], [&is_not_null, &eq_foo]] {
1605            let owned: Vec<Expr> = filters.iter().map(|e| (*e).clone()).collect();
1606            let pred = QueryPredicate::from_filters(&owned, &model);
1607            assert!(!pred.contradiction);
1608            assert!(matches!(
1609                pred.constraints.get(&label_idx),
1610                Some(PredicateConstraint::StringEq(s)) if s == "foo"
1611            ));
1612            assert!(pred.matches_row(&row_foo));
1613        }
1614
1615        // (label = 'foo') AND (label IS NULL) — must contradict in either order.
1616        for filters in [[&eq_foo, &is_null], [&is_null, &eq_foo]] {
1617            let owned: Vec<Expr> = filters.iter().map(|e| (*e).clone()).collect();
1618            assert!(QueryPredicate::from_filters(&owned, &model).contradiction);
1619        }
1620    }
1621
1622    #[test]
1623    fn non_nullable_null_predicates_simplify() {
1624        use datafusion::logical_expr::col;
1625        let config = KvTableConfig::new(
1626            0,
1627            vec![
1628                TableColumnConfig::new("id", DataType::Int64, false),
1629                TableColumnConfig::new("label", DataType::Utf8, true),
1630            ],
1631            vec!["id".to_string()],
1632            vec![],
1633        )
1634        .unwrap();
1635        let model = TableModel::from_config(&config).unwrap();
1636
1637        let is_null = col("id").is_null();
1638        assert!(QueryPredicate::supports_filter(&is_null, &model));
1639        assert!(QueryPredicate::from_filters(&[is_null], &model).contradiction);
1640
1641        let is_not_null = col("id").is_not_null();
1642        assert!(QueryPredicate::supports_filter(&is_not_null, &model));
1643        let pred = QueryPredicate::from_filters(&[is_not_null], &model);
1644        assert!(!pred.contradiction);
1645        assert!(pred.constraints.is_empty());
1646    }
1647
1648    #[test]
1649    fn null_literal_comparisons_are_contradictions() {
1650        use datafusion::logical_expr::col;
1651        let config = KvTableConfig::new(
1652            0,
1653            vec![
1654                TableColumnConfig::new("id", DataType::Int64, false),
1655                TableColumnConfig::new("label", DataType::Utf8, true),
1656            ],
1657            vec!["id".to_string()],
1658            vec![],
1659        )
1660        .unwrap();
1661        let model = TableModel::from_config(&config).unwrap();
1662
1663        for filter in [
1664            col("label").eq(Expr::Literal(ScalarValue::Utf8(None), None)),
1665            col("id").gt(Expr::Literal(ScalarValue::Int64(None), None)),
1666        ] {
1667            assert!(QueryPredicate::supports_filter(&filter, &model));
1668            assert!(QueryPredicate::from_filters(&[filter], &model).contradiction);
1669        }
1670    }
1671
1672    #[test]
1673    fn in_predicate_merges_with_comparisons_order_independent() {
1674        use datafusion::logical_expr::{col, in_list};
1675        let config = KvTableConfig::new(
1676            0,
1677            vec![
1678                TableColumnConfig::new("id", DataType::Int64, false),
1679                TableColumnConfig::new("label", DataType::Utf8, true),
1680                TableColumnConfig::new("score", DataType::Int64, true),
1681                TableColumnConfig::new("version", DataType::UInt64, true),
1682                TableColumnConfig::new("hash", DataType::FixedSizeBinary(2), true),
1683            ],
1684            vec!["id".to_string()],
1685            vec![],
1686        )
1687        .unwrap();
1688        let model = TableModel::from_config(&config).unwrap();
1689        let label_idx = *model.columns_by_name.get("label").unwrap();
1690        let score_idx = *model.columns_by_name.get("score").unwrap();
1691        let version_idx = *model.columns_by_name.get("version").unwrap();
1692        let hash_idx = *model.columns_by_name.get("hash").unwrap();
1693
1694        let label_in = in_list(
1695            col("label"),
1696            vec![
1697                Expr::Literal(ScalarValue::Utf8(Some("foo".to_string())), None),
1698                Expr::Literal(ScalarValue::Utf8(Some("bar".to_string())), None),
1699            ],
1700            false,
1701        );
1702        let label_eq = col("label").eq(Expr::Literal(
1703            ScalarValue::Utf8(Some("foo".to_string())),
1704            None,
1705        ));
1706        for filters in [[&label_in, &label_eq], [&label_eq, &label_in]] {
1707            let owned: Vec<Expr> = filters.iter().map(|e| (*e).clone()).collect();
1708            let pred = QueryPredicate::from_filters(&owned, &model);
1709            assert!(!pred.contradiction);
1710            assert!(matches!(
1711                pred.constraints.get(&label_idx),
1712                Some(PredicateConstraint::StringEq(v)) if v == "foo"
1713            ));
1714        }
1715
1716        let label_miss = col("label").eq(Expr::Literal(
1717            ScalarValue::Utf8(Some("baz".to_string())),
1718            None,
1719        ));
1720        for filters in [[&label_in, &label_miss], [&label_miss, &label_in]] {
1721            let owned: Vec<Expr> = filters.iter().map(|e| (*e).clone()).collect();
1722            assert!(QueryPredicate::from_filters(&owned, &model).contradiction);
1723        }
1724
1725        let score_in = in_list(
1726            col("score"),
1727            vec![
1728                Expr::Literal(ScalarValue::Int64(Some(1)), None),
1729                Expr::Literal(ScalarValue::Int64(Some(2)), None),
1730                Expr::Literal(ScalarValue::Int64(Some(3)), None),
1731            ],
1732            false,
1733        );
1734        let score_gt = col("score").gt(Expr::Literal(ScalarValue::Int64(Some(1)), None));
1735        for filters in [[&score_in, &score_gt], [&score_gt, &score_in]] {
1736            let owned: Vec<Expr> = filters.iter().map(|e| (*e).clone()).collect();
1737            let pred = QueryPredicate::from_filters(&owned, &model);
1738            assert!(!pred.contradiction);
1739            assert!(matches!(
1740                pred.constraints.get(&score_idx),
1741                Some(PredicateConstraint::IntIn(v)) if v == &vec![2, 3]
1742            ));
1743        }
1744
1745        let version_in = in_list(
1746            col("version"),
1747            vec![
1748                Expr::Literal(ScalarValue::UInt64(Some(1)), None),
1749                Expr::Literal(ScalarValue::UInt64(Some(2)), None),
1750                Expr::Literal(ScalarValue::UInt64(Some(3)), None),
1751            ],
1752            false,
1753        );
1754        let version_lt = col("version").lt(Expr::Literal(ScalarValue::UInt64(Some(3)), None));
1755        for filters in [[&version_in, &version_lt], [&version_lt, &version_in]] {
1756            let owned: Vec<Expr> = filters.iter().map(|e| (*e).clone()).collect();
1757            let pred = QueryPredicate::from_filters(&owned, &model);
1758            assert!(!pred.contradiction);
1759            assert!(matches!(
1760                pred.constraints.get(&version_idx),
1761                Some(PredicateConstraint::UInt64In(v)) if v == &vec![1, 2]
1762            ));
1763        }
1764
1765        let hash_in = in_list(
1766            col("hash"),
1767            vec![
1768                Expr::Literal(
1769                    ScalarValue::FixedSizeBinary(2, Some(vec![0xAA, 0xAA])),
1770                    None,
1771                ),
1772                Expr::Literal(
1773                    ScalarValue::FixedSizeBinary(2, Some(vec![0xBB, 0xBB])),
1774                    None,
1775                ),
1776            ],
1777            false,
1778        );
1779        let hash_eq = col("hash").eq(Expr::Literal(
1780            ScalarValue::FixedSizeBinary(2, Some(vec![0xAA, 0xAA])),
1781            None,
1782        ));
1783        for filters in [[&hash_in, &hash_eq], [&hash_eq, &hash_in]] {
1784            let owned: Vec<Expr> = filters.iter().map(|e| (*e).clone()).collect();
1785            let pred = QueryPredicate::from_filters(&owned, &model);
1786            assert!(!pred.contradiction);
1787            assert!(matches!(
1788                pred.constraints.get(&hash_idx),
1789                Some(PredicateConstraint::FixedBinaryEq(v)) if v == &vec![0xAA, 0xAA]
1790            ));
1791        }
1792    }
1793
1794    #[test]
1795    fn empty_and_null_in_lists_are_exact() {
1796        use datafusion::logical_expr::{col, in_list};
1797        let config = KvTableConfig::new(
1798            0,
1799            vec![
1800                TableColumnConfig::new("id", DataType::Int64, false),
1801                TableColumnConfig::new("label", DataType::Utf8, true),
1802                TableColumnConfig::new("score", DataType::Float64, true),
1803                TableColumnConfig::new("version", DataType::UInt64, true),
1804                TableColumnConfig::new("hash", DataType::FixedSizeBinary(2), true),
1805            ],
1806            vec!["id".to_string()],
1807            vec![],
1808        )
1809        .unwrap();
1810        let model = TableModel::from_config(&config).unwrap();
1811        let id_idx = *model.columns_by_name.get("id").unwrap();
1812
1813        for filter in [
1814            in_list(col("id"), vec![], false),
1815            in_list(
1816                col("id"),
1817                vec![Expr::Literal(ScalarValue::Int64(None), None)],
1818                false,
1819            ),
1820            in_list(
1821                col("label"),
1822                vec![Expr::Literal(ScalarValue::Utf8(None), None)],
1823                false,
1824            ),
1825            in_list(
1826                col("score"),
1827                vec![Expr::Literal(ScalarValue::Float64(None), None)],
1828                false,
1829            ),
1830            in_list(
1831                col("version"),
1832                vec![Expr::Literal(ScalarValue::UInt64(None), None)],
1833                false,
1834            ),
1835            in_list(
1836                col("hash"),
1837                vec![Expr::Literal(ScalarValue::FixedSizeBinary(2, None), None)],
1838                false,
1839            ),
1840        ] {
1841            assert!(QueryPredicate::supports_filter(&filter, &model));
1842            assert!(QueryPredicate::from_filters(&[filter], &model).contradiction);
1843        }
1844
1845        let filter = in_list(
1846            col("id"),
1847            vec![
1848                Expr::Literal(ScalarValue::Int64(Some(7)), None),
1849                Expr::Literal(ScalarValue::Int64(None), None),
1850            ],
1851            false,
1852        );
1853        assert!(QueryPredicate::supports_filter(&filter, &model));
1854        let pred = QueryPredicate::from_filters(&[filter], &model);
1855        assert!(!pred.contradiction);
1856        assert!(matches!(
1857            pred.constraints.get(&id_idx),
1858            Some(PredicateConstraint::IntRange {
1859                min: Some(7),
1860                max: Some(7)
1861            })
1862        ));
1863    }
1864
1865    #[test]
1866    fn table_config_accepts_date32_column() {
1867        let config = KvTableConfig::new(
1868            0,
1869            vec![
1870                TableColumnConfig::new("id", DataType::Int64, false),
1871                TableColumnConfig::new("created", DataType::Date32, false),
1872            ],
1873            vec!["id".to_string()],
1874            vec![],
1875        )
1876        .expect("Date32 column should be accepted");
1877        assert_eq!(config.columns.len(), 2);
1878    }
1879
1880    #[test]
1881    fn table_config_accepts_timestamp_column() {
1882        let config = KvTableConfig::new(
1883            0,
1884            vec![
1885                TableColumnConfig::new("id", DataType::Int64, false),
1886                TableColumnConfig::new(
1887                    "ts",
1888                    DataType::Timestamp(TimeUnit::Microsecond, None),
1889                    false,
1890                ),
1891            ],
1892            vec!["id".to_string()],
1893            vec![],
1894        )
1895        .expect("Timestamp column should be accepted");
1896        let schema = config.to_schema();
1897        assert!(matches!(
1898            schema.field(1).data_type(),
1899            DataType::Timestamp(TimeUnit::Microsecond, _)
1900        ));
1901    }
1902
1903    #[test]
1904    fn table_config_normalizes_timestamp_to_microsecond() {
1905        let config = KvTableConfig::new(
1906            0,
1907            vec![
1908                TableColumnConfig::new("id", DataType::Int64, false),
1909                TableColumnConfig::new(
1910                    "ts",
1911                    DataType::Timestamp(TimeUnit::Nanosecond, None),
1912                    false,
1913                ),
1914            ],
1915            vec!["id".to_string()],
1916            vec![],
1917        )
1918        .expect("Nanosecond timestamp should be accepted");
1919        let schema = config.to_schema();
1920        assert!(matches!(
1921            schema.field(1).data_type(),
1922            DataType::Timestamp(TimeUnit::Microsecond, _)
1923        ));
1924    }
1925
1926    #[test]
1927    fn table_config_accepts_decimal128_column() {
1928        let config = KvTableConfig::new(
1929            0,
1930            vec![
1931                TableColumnConfig::new("id", DataType::Int64, false),
1932                TableColumnConfig::new("price", DataType::Decimal128(10, 2), false),
1933            ],
1934            vec!["id".to_string()],
1935            vec![],
1936        )
1937        .expect("Decimal128 column should be accepted");
1938        assert_eq!(config.columns.len(), 2);
1939    }
1940
1941    #[test]
1942    fn table_config_accepts_list_column() {
1943        use datafusion::arrow::datatypes::Field;
1944
1945        let config = KvTableConfig::new(
1946            0,
1947            vec![
1948                TableColumnConfig::new("id", DataType::Int64, false),
1949                TableColumnConfig::new(
1950                    "tags",
1951                    DataType::List(Arc::new(Field::new("item", DataType::Utf8, false))),
1952                    false,
1953                ),
1954            ],
1955            vec!["id".to_string()],
1956            vec![],
1957        )
1958        .expect("List<Utf8> column should be accepted");
1959        assert_eq!(config.columns.len(), 2);
1960    }
1961
1962    #[test]
1963    fn list_column_rejected_in_index() {
1964        use datafusion::arrow::datatypes::Field;
1965
1966        let result = KvTableConfig::new(
1967            0,
1968            vec![
1969                TableColumnConfig::new("id", DataType::Int64, false),
1970                TableColumnConfig::new(
1971                    "tags",
1972                    DataType::List(Arc::new(Field::new("item", DataType::Utf8, false))),
1973                    false,
1974                ),
1975            ],
1976            vec!["id".to_string()],
1977            vec![IndexSpec::new("tags_idx", vec!["tags".to_string()]).unwrap()],
1978        );
1979        assert!(
1980            result.is_err() || {
1981                let config = result.unwrap();
1982                let model = TableModel::from_config(&config).unwrap();
1983                model.resolve_index_specs(&config.index_specs).is_err()
1984            }
1985        );
1986    }
1987
1988    #[test]
1989    fn i32_ordered_encoding_round_trip() {
1990        let values = [i32::MIN, -1000, -1, 0, 1, 1000, i32::MAX];
1991        for v in values {
1992            assert_eq!(decode_i32_ordered(encode_i32_ordered(v)), v);
1993        }
1994        let encoded: Vec<[u8; 4]> = values.iter().map(|v| encode_i32_ordered(*v)).collect();
1995        for i in 0..encoded.len() - 1 {
1996            assert!(encoded[i] < encoded[i + 1]);
1997        }
1998    }
1999
2000    #[test]
2001    fn i128_ordered_encoding_round_trip() {
2002        let values = [i128::MIN, -1, 0, 1, 1234567890123456789, i128::MAX];
2003        for v in values {
2004            assert_eq!(decode_i128_ordered(encode_i128_ordered(v)), v);
2005        }
2006        let encoded: Vec<[u8; 16]> = values.iter().map(|v| encode_i128_ordered(*v)).collect();
2007        for i in 0..encoded.len() - 1 {
2008            assert!(encoded[i] < encoded[i + 1]);
2009        }
2010    }
2011
2012    fn extended_model() -> (TableModel, Vec<ResolvedIndexSpec>) {
2013        let config = KvTableConfig::new(
2014            0,
2015            vec![
2016                TableColumnConfig::new("id", DataType::Int64, false),
2017                TableColumnConfig::new("created", DataType::Date32, false),
2018                TableColumnConfig::new(
2019                    "ts",
2020                    DataType::Timestamp(TimeUnit::Microsecond, None),
2021                    false,
2022                ),
2023                TableColumnConfig::new("price", DataType::Decimal128(10, 2), false),
2024                TableColumnConfig::new("label", DataType::Utf8, false),
2025            ],
2026            vec!["id".to_string()],
2027            vec![
2028                IndexSpec::new(
2029                    "date_label",
2030                    vec!["created".to_string(), "label".to_string()],
2031                )
2032                .expect("valid"),
2033                IndexSpec::new("price_idx", vec!["price".to_string()]).expect("valid"),
2034            ],
2035        )
2036        .expect("valid config");
2037        let model = TableModel::from_config(&config).expect("model");
2038        let specs = model
2039            .resolve_index_specs(&config.index_specs)
2040            .expect("specs");
2041        (model, specs)
2042    }
2043
2044    #[test]
2045    fn secondary_index_key_round_trip_date32_and_decimal128() {
2046        let (model, specs) = extended_model();
2047        let row = KvRow {
2048            values: vec![
2049                CellValue::Int64(42),
2050                CellValue::Date32(19000),
2051                CellValue::Timestamp(1_700_000_000_000_000),
2052                CellValue::Decimal128(123456),
2053                CellValue::Utf8("hello".to_string()),
2054            ],
2055        };
2056        let key = encode_secondary_index_key(model.table_prefix, &specs[0], &model, &row)
2057            .expect("encode");
2058        let decoded = decode_secondary_index_key(model.table_prefix, &specs[0], &model, &key)
2059            .expect("decode");
2060        let created_idx = *model.columns_by_name.get("created").unwrap();
2061        let label_idx = *model.columns_by_name.get("label").unwrap();
2062        assert!(matches!(
2063            decoded.values.get(&created_idx),
2064            Some(CellValue::Date32(19000))
2065        ));
2066        assert!(matches!(
2067            decoded.values.get(&label_idx),
2068            Some(CellValue::Utf8(v)) if v == "hello"
2069        ));
2070        assert!(matches!(
2071            &decoded.primary_key_values[0],
2072            CellValue::Int64(42)
2073        ));
2074
2075        let key2 = encode_secondary_index_key(model.table_prefix, &specs[1], &model, &row)
2076            .expect("encode");
2077        let decoded2 = decode_secondary_index_key(model.table_prefix, &specs[1], &model, &key2)
2078            .expect("decode");
2079        let price_idx = *model.columns_by_name.get("price").unwrap();
2080        assert!(matches!(
2081            decoded2.values.get(&price_idx),
2082            Some(CellValue::Decimal128(123456))
2083        ));
2084        assert!(matches!(
2085            &decoded2.primary_key_values[0],
2086            CellValue::Int64(42)
2087        ));
2088    }
2089
2090    #[test]
2091    fn base_row_round_trip_with_date32_timestamp_decimal128() {
2092        let (model, _specs) = extended_model();
2093        let row = KvRow {
2094            values: vec![
2095                CellValue::Int64(7),
2096                CellValue::Date32(19500),
2097                CellValue::Timestamp(1_700_000_000_000_000),
2098                CellValue::Decimal128(-9876543),
2099                CellValue::Utf8("world".to_string()),
2100            ],
2101        };
2102        let encoded = encode_base_row_value(&row, &model).expect("encode");
2103        let decoded = decode_base_row(vec![CellValue::Int64(7)], &encoded, &model).expect("decode");
2104        assert!(matches!(&decoded.values[0], CellValue::Int64(7)));
2105        assert!(matches!(&decoded.values[1], CellValue::Date32(19500)));
2106        assert!(matches!(
2107            &decoded.values[2],
2108            CellValue::Timestamp(1_700_000_000_000_000)
2109        ));
2110        assert!(matches!(
2111            &decoded.values[3],
2112            CellValue::Decimal128(-9876543)
2113        ));
2114        assert!(matches!(&decoded.values[4], CellValue::Utf8(v) if v == "world"));
2115    }
2116
2117    #[test]
2118    fn base_row_round_trip_with_list() {
2119        use datafusion::arrow::datatypes::Field;
2120
2121        let config = KvTableConfig::new(
2122            0,
2123            vec![
2124                TableColumnConfig::new("id", DataType::Int64, false),
2125                TableColumnConfig::new(
2126                    "tags",
2127                    DataType::List(Arc::new(Field::new("item", DataType::Utf8, false))),
2128                    false,
2129                ),
2130                TableColumnConfig::new(
2131                    "scores",
2132                    DataType::List(Arc::new(Field::new("item", DataType::Int64, false))),
2133                    false,
2134                ),
2135            ],
2136            vec!["id".to_string()],
2137            vec![],
2138        )
2139        .expect("valid");
2140        let model = TableModel::from_config(&config).expect("model");
2141        let row = KvRow {
2142            values: vec![
2143                CellValue::Int64(1),
2144                CellValue::List(vec![
2145                    CellValue::Utf8("a".to_string()),
2146                    CellValue::Utf8("b".to_string()),
2147                ]),
2148                CellValue::List(vec![CellValue::Int64(10), CellValue::Int64(20)]),
2149            ],
2150        };
2151        let encoded = encode_base_row_value(&row, &model).expect("encode");
2152        let decoded = decode_base_row(vec![CellValue::Int64(1)], &encoded, &model).expect("decode");
2153        assert!(matches!(&decoded.values[0], CellValue::Int64(1)));
2154        match &decoded.values[1] {
2155            CellValue::List(items) => {
2156                assert_eq!(items.len(), 2);
2157                assert!(matches!(&items[0], CellValue::Utf8(v) if v == "a"));
2158                assert!(matches!(&items[1], CellValue::Utf8(v) if v == "b"));
2159            }
2160            _ => panic!("expected List"),
2161        }
2162        match &decoded.values[2] {
2163            CellValue::List(items) => {
2164                assert_eq!(items.len(), 2);
2165                assert!(matches!(&items[0], CellValue::Int64(10)));
2166                assert!(matches!(&items[1], CellValue::Int64(20)));
2167            }
2168            _ => panic!("expected List"),
2169        }
2170    }
2171
2172    #[test]
2173    fn decimal128_constraint_range() {
2174        let mut min: Option<i128> = None;
2175        let mut max: Option<i128> = None;
2176        let mut contradiction = false;
2177        apply_decimal128_constraint(&mut min, &mut max, Operator::GtEq, 100, &mut contradiction);
2178        assert!(!contradiction);
2179        apply_decimal128_constraint(&mut min, &mut max, Operator::LtEq, 200, &mut contradiction);
2180        assert!(!contradiction);
2181        assert_eq!(min, Some(100));
2182        assert_eq!(max, Some(200));
2183        assert!(in_i128_bounds(150, min, max));
2184        assert!(!in_i128_bounds(99, min, max));
2185        assert!(!in_i128_bounds(201, min, max));
2186    }
2187
2188    #[test]
2189    fn decimal256_gt_max_is_contradiction() {
2190        let mut min: Option<i256> = None;
2191        let mut max: Option<i256> = None;
2192        let mut contradiction = false;
2193        apply_i256_constraint(
2194            &mut min,
2195            &mut max,
2196            Operator::Gt,
2197            i256::MAX,
2198            &mut contradiction,
2199        );
2200        assert!(contradiction);
2201        assert_eq!(min, None);
2202        assert_eq!(max, None);
2203    }
2204
2205    #[test]
2206    fn decimal256_lt_min_is_contradiction() {
2207        let mut min: Option<i256> = None;
2208        let mut max: Option<i256> = None;
2209        let mut contradiction = false;
2210        apply_i256_constraint(
2211            &mut min,
2212            &mut max,
2213            Operator::Lt,
2214            i256::MIN,
2215            &mut contradiction,
2216        );
2217        assert!(contradiction);
2218        assert_eq!(min, None);
2219        assert_eq!(max, None);
2220    }
2221
2222    #[test]
2223    fn date32_index_bound_clamps_on_i64_overflow() {
2224        let config = KvTableConfig::new(
2225            0,
2226            vec![
2227                TableColumnConfig::new("id", DataType::Int64, false),
2228                TableColumnConfig::new("created", DataType::Date32, false),
2229            ],
2230            vec!["id".to_string()],
2231            vec![IndexSpec::new("created_idx", vec!["created".to_string()]).unwrap()],
2232        )
2233        .unwrap();
2234        let model = TableModel::from_config(&config).unwrap();
2235        let specs = model.resolve_index_specs(&config.index_specs).unwrap();
2236
2237        let created_idx = *model.columns_by_name.get("created").unwrap();
2238        let mut pred = QueryPredicate::default();
2239        pred.constraints.insert(
2240            created_idx,
2241            PredicateConstraint::IntRange {
2242                min: Some(i32::MAX as i64 + 1),
2243                max: None,
2244            },
2245        );
2246
2247        let start = pred
2248            .encode_index_bound_key(model.table_prefix, &model, &specs[0], 1, false)
2249            .unwrap();
2250        let end = pred
2251            .encode_index_bound_key(model.table_prefix, &model, &specs[0], 1, true)
2252            .unwrap();
2253
2254        assert!(
2255            start <= end,
2256            "lower bound must not exceed upper bound (was wrapping via as i32)"
2257        );
2258
2259        let encoded_lower = specs[0].codec.read_payload_exact::<4>(&start, 0).unwrap();
2260        let decoded_lower = decode_i32_ordered(encoded_lower);
2261        assert_eq!(
2262            decoded_lower,
2263            i32::MAX,
2264            "out-of-range i64 must clamp to i32::MAX, not wrap"
2265        );
2266    }
2267
2268    #[test]
2269    fn timestamp_nanos_gt_uses_floor_division() {
2270        let micros = timestamp_scalar_to_micros_for_op(
2271            &ScalarValue::TimestampNanosecond(Some(-1500), None),
2272            Operator::Gt,
2273        )
2274        .unwrap();
2275        assert_eq!(micros, -2, "Gt on -1500ns should floor to -2us");
2276
2277        let mut min: Option<i64> = None;
2278        let mut max: Option<i64> = None;
2279        let mut contradiction = false;
2280        apply_int_constraint(&mut min, &mut max, Operator::Gt, micros, &mut contradiction);
2281        assert_eq!(min, Some(-1), "Gt(-2us) + 1 = min -1us");
2282
2283        let row_at_minus_1 = CellValue::Timestamp(-1);
2284        assert!(matches_constraint(
2285            &row_at_minus_1,
2286            &PredicateConstraint::IntRange { min, max }
2287        ));
2288    }
2289
2290    #[test]
2291    fn timestamp_nanos_lteq_uses_floor_division() {
2292        let micros = timestamp_scalar_to_micros_for_op(
2293            &ScalarValue::TimestampNanosecond(Some(-1500), None),
2294            Operator::LtEq,
2295        )
2296        .unwrap();
2297        assert_eq!(micros, -2, "LtEq on -1500ns should floor to -2us");
2298
2299        let row_at_minus_1 = CellValue::Timestamp(-1);
2300        assert!(
2301            !matches_constraint(
2302                &row_at_minus_1,
2303                &PredicateConstraint::IntRange {
2304                    min: None,
2305                    max: Some(micros)
2306                }
2307            ),
2308            "-1us (-1000ns) > -1500ns, must not satisfy <= -1500ns"
2309        );
2310    }
2311
2312    #[test]
2313    fn timestamp_nanos_gteq_uses_ceil_division() {
2314        let micros = timestamp_scalar_to_micros_for_op(
2315            &ScalarValue::TimestampNanosecond(Some(-1500), None),
2316            Operator::GtEq,
2317        )
2318        .unwrap();
2319        assert_eq!(micros, -1, "GtEq on -1500ns should ceil to -1us");
2320    }
2321
2322    #[test]
2323    fn timestamp_nanos_lt_uses_ceil_division() {
2324        let micros = timestamp_scalar_to_micros_for_op(
2325            &ScalarValue::TimestampNanosecond(Some(-1500), None),
2326            Operator::Lt,
2327        )
2328        .unwrap();
2329        assert_eq!(micros, -1, "Lt on -1500ns should ceil to -1us");
2330
2331        let mut min: Option<i64> = None;
2332        let mut max: Option<i64> = None;
2333        let mut contradiction = false;
2334        apply_int_constraint(&mut min, &mut max, Operator::Lt, micros, &mut contradiction);
2335        assert_eq!(max, Some(-2), "Lt(-1us) - 1 = max -2us");
2336    }
2337
2338    #[test]
2339    fn timestamp_nanos_eq_non_aligned_is_contradiction() {
2340        let result = timestamp_scalar_to_micros_for_op(
2341            &ScalarValue::TimestampNanosecond(Some(-1500), None),
2342            Operator::Eq,
2343        );
2344        assert!(
2345            result.is_none(),
2346            "non-aligned ns Eq must produce contradiction"
2347        );
2348    }
2349
2350    #[test]
2351    fn timestamp_nanos_exact_multiple_is_unchanged() {
2352        for op in [
2353            Operator::Eq,
2354            Operator::Gt,
2355            Operator::GtEq,
2356            Operator::Lt,
2357            Operator::LtEq,
2358        ] {
2359            let micros = timestamp_scalar_to_micros_for_op(
2360                &ScalarValue::TimestampNanosecond(Some(-2000), None),
2361                op,
2362            )
2363            .unwrap();
2364            assert_eq!(micros, -2, "exact multiple -2000ns = -2us for {op:?}");
2365        }
2366    }
2367
2368    #[test]
2369    fn float64_index_bounds_include_infinity() {
2370        let config = KvTableConfig::new(
2371            0,
2372            vec![
2373                TableColumnConfig::new("id", DataType::Int64, false),
2374                TableColumnConfig::new("val", DataType::Float64, false),
2375            ],
2376            vec!["id".to_string()],
2377            vec![IndexSpec::new("val_idx", vec!["val".to_string()]).unwrap()],
2378        )
2379        .unwrap();
2380        let model = TableModel::from_config(&config).unwrap();
2381        let specs = model.resolve_index_specs(&config.index_specs).unwrap();
2382
2383        let pred = QueryPredicate::default();
2384        let start = pred
2385            .encode_index_bound_key(model.table_prefix, &model, &specs[0], 0, false)
2386            .unwrap();
2387        let end = pred
2388            .encode_index_bound_key(model.table_prefix, &model, &specs[0], 0, true)
2389            .unwrap();
2390
2391        let neg_inf_row = KvRow {
2392            values: vec![CellValue::Int64(1), CellValue::Float64(f64::NEG_INFINITY)],
2393        };
2394        let pos_inf_row = KvRow {
2395            values: vec![CellValue::Int64(2), CellValue::Float64(f64::INFINITY)],
2396        };
2397
2398        let neg_inf_key =
2399            encode_secondary_index_key(model.table_prefix, &specs[0], &model, &neg_inf_row)
2400                .unwrap();
2401        let pos_inf_key =
2402            encode_secondary_index_key(model.table_prefix, &specs[0], &model, &pos_inf_row)
2403                .unwrap();
2404
2405        assert!(
2406            neg_inf_key >= start,
2407            "NEG_INFINITY row key must be within scan start bound"
2408        );
2409        assert!(
2410            pos_inf_key <= end,
2411            "INFINITY row key must be within scan end bound"
2412        );
2413    }
2414
2415    #[test]
2416    fn distinct_table_prefixes_produce_non_overlapping_pk_ranges() {
2417        let range_a = primary_key_prefix_range(1);
2418        let range_b = primary_key_prefix_range(2);
2419        assert!(
2420            range_a.end < range_b.start,
2421            "table prefix 1 pk range must be entirely below table prefix 2"
2422        );
2423    }
2424
2425    #[test]
2426    fn distinct_table_prefixes_isolate_primary_keys() {
2427        let model_1 = simple_int64_model(1);
2428        let model_2 = simple_int64_model(2);
2429        let pk = CellValue::Int64(42);
2430        let key_a = encode_primary_key(1, &[&pk], &model_1).expect("pk key encodes");
2431        let key_b = encode_primary_key(2, &[&pk], &model_2).expect("pk key encodes");
2432        assert_ne!(key_a, key_b, "same PK under different prefixes must differ");
2433        assert!(
2434            decode_primary_key(1, &key_a, &model_1).is_some(),
2435            "key_a must decode under prefix 1"
2436        );
2437        assert!(
2438            decode_primary_key(2, &key_a, &model_2).is_none(),
2439            "key_a must NOT decode under prefix 2"
2440        );
2441        assert!(
2442            decode_primary_key(2, &key_b, &model_2).is_some(),
2443            "key_b must decode under prefix 2"
2444        );
2445    }
2446
2447    #[test]
2448    fn distinct_table_prefixes_isolate_secondary_keys() {
2449        let config_a = KvTableConfig::new(
2450            10,
2451            vec![
2452                TableColumnConfig::new("id", DataType::Int64, false),
2453                TableColumnConfig::new("name", DataType::Utf8, false),
2454            ],
2455            vec!["id".to_string()],
2456            vec![IndexSpec::new("name_idx", vec!["name".to_string()]).unwrap()],
2457        )
2458        .unwrap();
2459        let config_b = KvTableConfig::new(
2460            11,
2461            vec![
2462                TableColumnConfig::new("id", DataType::Int64, false),
2463                TableColumnConfig::new("name", DataType::Utf8, false),
2464            ],
2465            vec!["id".to_string()],
2466            vec![IndexSpec::new("name_idx", vec!["name".to_string()]).unwrap()],
2467        )
2468        .unwrap();
2469
2470        let model_a = TableModel::from_config(&config_a).unwrap();
2471        let specs_a = model_a.resolve_index_specs(&config_a.index_specs).unwrap();
2472        let model_b = TableModel::from_config(&config_b).unwrap();
2473        let specs_b = model_b.resolve_index_specs(&config_b.index_specs).unwrap();
2474
2475        let row = KvRow {
2476            values: vec![CellValue::Int64(1), CellValue::Utf8("alice".to_string())],
2477        };
2478        let key_a =
2479            encode_secondary_index_key(model_a.table_prefix, &specs_a[0], &model_a, &row).unwrap();
2480        let key_b =
2481            encode_secondary_index_key(model_b.table_prefix, &specs_b[0], &model_b, &row).unwrap();
2482
2483        assert_ne!(
2484            key_a, key_b,
2485            "same row under different prefixes must differ"
2486        );
2487        assert!(
2488            decode_secondary_index_key(model_a.table_prefix, &specs_a[0], &model_a, &key_a)
2489                .is_some()
2490        );
2491        assert!(
2492            decode_secondary_index_key(model_a.table_prefix, &specs_a[0], &model_a, &key_b)
2493                .is_none(),
2494            "key from table B must not decode under table A's prefix"
2495        );
2496    }
2497
2498    #[test]
2499    fn table_prefix_stored_in_model() {
2500        let config = KvTableConfig::new(
2501            12,
2502            vec![TableColumnConfig::new("id", DataType::Int64, false)],
2503            vec!["id".to_string()],
2504            vec![],
2505        )
2506        .unwrap();
2507        let model = TableModel::from_config(&config).unwrap();
2508        assert_eq!(model.table_prefix, 12);
2509    }
2510
2511    #[test]
2512    fn codec_layout_exposes_payload_bits_under_reserved_family_bits() {
2513        let config = KvTableConfig::new(
2514            0,
2515            vec![
2516                TableColumnConfig::new("id", DataType::FixedSizeBinary(16), false),
2517                TableColumnConfig::new("bucket", DataType::FixedSizeBinary(16), false),
2518            ],
2519            vec!["id".to_string()],
2520            vec![IndexSpec::new("bucket_idx", vec!["bucket".to_string()]).unwrap()],
2521        )
2522        .unwrap();
2523        let model = TableModel::from_config(&config).unwrap();
2524        let spec = model
2525            .resolve_index_specs(&config.index_specs)
2526            .unwrap()
2527            .remove(0);
2528
2529        let mut current_primary = HashSet::new();
2530        let mut current_secondary = HashSet::new();
2531
2532        fn first_twelve_bits_of_key(key: &[u8]) -> u16 {
2533            let first = u16::from(*key.first().unwrap_or(&0));
2534            let second = u16::from(*key.get(1).unwrap_or(&0));
2535            (first << 4) | (second >> 4)
2536        }
2537
2538        for first_byte in 0u8..=255 {
2539            let mut id = vec![0u8; 16];
2540            id[0] = first_byte;
2541            let mut bucket = vec![0u8; 16];
2542            bucket[0] = first_byte;
2543
2544            let pk = CellValue::FixedBinary(id.clone());
2545            let current_pk = encode_primary_key(model.table_prefix, &[&pk], &model).unwrap();
2546            current_primary.insert(first_twelve_bits_of_key(&current_pk));
2547
2548            let row = KvRow {
2549                values: vec![
2550                    CellValue::FixedBinary(id),
2551                    CellValue::FixedBinary(bucket.clone()),
2552                ],
2553            };
2554            let current_index =
2555                encode_secondary_index_key(model.table_prefix, &spec, &model, &row).unwrap();
2556            current_secondary.insert(first_twelve_bits_of_key(&current_index));
2557        }
2558
2559        // Primary keys reserve 5 high bits for family, leaving 7 payload bits in the first
2560        // 12 bits of the physical key. Varying one payload byte therefore spans 2^7 values.
2561        assert_eq!(current_primary.len(), 128);
2562
2563        // Secondary index keys reserve 9 high bits for family, leaving 3 payload bits in the
2564        // first 12 bits. Varying one payload byte therefore spans 2^3 values.
2565        assert_eq!(current_secondary.len(), 8);
2566    }
2567
2568    #[test]
2569    fn kv_schema_auto_assigns_sequential_prefixes() {
2570        let client = StoreClient::new("http://localhost:10000");
2571        let schema = KvSchema::new(client)
2572            .table(
2573                "alpha",
2574                vec![TableColumnConfig::new("id", DataType::Int64, false)],
2575                vec!["id".to_string()],
2576                vec![],
2577            )
2578            .unwrap()
2579            .table(
2580                "beta",
2581                vec![
2582                    TableColumnConfig::new("id", DataType::Int64, false),
2583                    TableColumnConfig::new("name", DataType::Utf8, false),
2584                ],
2585                vec!["id".to_string()],
2586                vec![],
2587            )
2588            .unwrap()
2589            .table(
2590                "gamma",
2591                vec![TableColumnConfig::new("id", DataType::Int64, false)],
2592                vec!["id".to_string()],
2593                vec![],
2594            )
2595            .unwrap();
2596
2597        assert_eq!(schema.table_count(), 3);
2598    }
2599
2600    #[test]
2601    fn kv_schema_allows_max_codec_table_count_and_rejects_overflow() {
2602        let client = StoreClient::new("http://localhost:10000");
2603        let mut schema = KvSchema::new(client);
2604        for idx in 0..MAX_TABLES {
2605            schema = schema
2606                .table(
2607                    format!("t{idx}"),
2608                    vec![TableColumnConfig::new("id", DataType::Int64, false)],
2609                    vec!["id".to_string()],
2610                    vec![],
2611                )
2612                .expect("tables up to codec capacity should be accepted");
2613        }
2614        assert_eq!(schema.table_count(), MAX_TABLES);
2615
2616        let overflow = schema.table(
2617            "overflow",
2618            vec![TableColumnConfig::new("id", DataType::Int64, false)],
2619            vec!["id".to_string()],
2620            vec![],
2621        );
2622        match overflow {
2623            Ok(_) => panic!("overflow table should be rejected"),
2624            Err(err) => assert!(
2625                err.contains(&format!(
2626                    "too many tables for codec layout (max {MAX_TABLES})"
2627                )),
2628                "overflow table should be rejected with codec-capacity error"
2629            ),
2630        }
2631    }
2632
2633    #[test]
2634    fn sequential_prefixes_produce_non_overlapping_pk_ranges() {
2635        let range_a = primary_key_prefix_range(0);
2636        let range_b = primary_key_prefix_range(1);
2637        let range_c = primary_key_prefix_range(2);
2638        assert!(range_a.end < range_b.start);
2639        assert!(range_b.end < range_c.start);
2640    }
2641
2642    #[test]
2643    fn sequential_prefixes_isolate_primary_keys() {
2644        let model_0 = simple_int64_model(0);
2645        let model_1 = simple_int64_model(1);
2646        let pk = CellValue::Int64(42);
2647        let key_a = encode_primary_key(0, &[&pk], &model_0).expect("pk key encodes");
2648        let key_b = encode_primary_key(1, &[&pk], &model_1).expect("pk key encodes");
2649        assert_ne!(key_a, key_b);
2650        assert!(decode_primary_key(0, &key_a, &model_0).is_some());
2651        assert!(decode_primary_key(1, &key_a, &model_1).is_none());
2652        assert!(decode_primary_key(0, &key_b, &model_0).is_none());
2653        assert!(decode_primary_key(1, &key_b, &model_1).is_some());
2654    }
2655
2656    #[test]
2657    fn sequential_prefixes_isolate_secondary_keys() {
2658        let config_a = KvTableConfig::new(
2659            0,
2660            vec![
2661                TableColumnConfig::new("id", DataType::Int64, false),
2662                TableColumnConfig::new("name", DataType::Utf8, false),
2663            ],
2664            vec!["id".to_string()],
2665            vec![IndexSpec::new("name_idx", vec!["name".to_string()]).unwrap()],
2666        )
2667        .unwrap();
2668        let config_b = KvTableConfig::new(
2669            1,
2670            vec![
2671                TableColumnConfig::new("id", DataType::Int64, false),
2672                TableColumnConfig::new("name", DataType::Utf8, false),
2673            ],
2674            vec!["id".to_string()],
2675            vec![IndexSpec::new("name_idx", vec!["name".to_string()]).unwrap()],
2676        )
2677        .unwrap();
2678
2679        let model_a = TableModel::from_config(&config_a).unwrap();
2680        let specs_a = model_a.resolve_index_specs(&config_a.index_specs).unwrap();
2681        let model_b = TableModel::from_config(&config_b).unwrap();
2682        let specs_b = model_b.resolve_index_specs(&config_b.index_specs).unwrap();
2683
2684        let row = KvRow {
2685            values: vec![CellValue::Int64(1), CellValue::Utf8("alice".to_string())],
2686        };
2687        let key_a =
2688            encode_secondary_index_key(model_a.table_prefix, &specs_a[0], &model_a, &row).unwrap();
2689        let key_b =
2690            encode_secondary_index_key(model_b.table_prefix, &specs_b[0], &model_b, &row).unwrap();
2691        assert_ne!(key_a, key_b);
2692        assert!(
2693            decode_secondary_index_key(model_a.table_prefix, &specs_a[0], &model_a, &key_b)
2694                .is_none(),
2695            "key from prefix 1 must not decode under prefix 0"
2696        );
2697    }
2698
2699    #[tokio::test]
2700    async fn kv_schema_register_all_enables_join() {
2701        let ctx = SessionContext::new();
2702        let client = StoreClient::new("http://localhost:10000");
2703
2704        let result = KvSchema::new(client)
2705            .table(
2706                "customers",
2707                vec![
2708                    TableColumnConfig::new("customer_id", DataType::Int64, false),
2709                    TableColumnConfig::new("name", DataType::Utf8, false),
2710                ],
2711                vec!["customer_id".to_string()],
2712                vec![],
2713            )
2714            .unwrap()
2715            .table(
2716                "orders",
2717                vec![
2718                    TableColumnConfig::new("order_id", DataType::Int64, false),
2719                    TableColumnConfig::new("customer_id", DataType::Int64, false),
2720                    TableColumnConfig::new("amount", DataType::Int64, false),
2721                ],
2722                vec!["order_id".to_string()],
2723                vec![IndexSpec::new("cust_idx", vec!["customer_id".to_string()]).unwrap()],
2724            )
2725            .unwrap()
2726            .register_all(&ctx);
2727
2728        assert!(
2729            result.is_ok(),
2730            "register_all must succeed: {:?}",
2731            result.err()
2732        );
2733
2734        let plan = ctx
2735            .sql(
2736                "SELECT c.name, o.order_id, o.amount \
2737                 FROM orders o \
2738                 JOIN customers c ON o.customer_id = c.customer_id",
2739            )
2740            .await;
2741        assert!(
2742            plan.is_ok(),
2743            "JOIN query must plan successfully: {:?}",
2744            plan.err()
2745        );
2746    }
2747
2748    #[tokio::test]
2749    async fn kv_schema_three_way_join() {
2750        let ctx = SessionContext::new();
2751        let client = StoreClient::new("http://localhost:10000");
2752
2753        KvSchema::new(client)
2754            .table(
2755                "products",
2756                vec![
2757                    TableColumnConfig::new("product_id", DataType::Int64, false),
2758                    TableColumnConfig::new("name", DataType::Utf8, false),
2759                    TableColumnConfig::new("price", DataType::Int64, false),
2760                ],
2761                vec!["product_id".to_string()],
2762                vec![],
2763            )
2764            .unwrap()
2765            .table(
2766                "line_items",
2767                vec![
2768                    TableColumnConfig::new("item_id", DataType::Int64, false),
2769                    TableColumnConfig::new("order_id", DataType::Int64, false),
2770                    TableColumnConfig::new("product_id", DataType::Int64, false),
2771                    TableColumnConfig::new("qty", DataType::Int64, false),
2772                ],
2773                vec!["item_id".to_string()],
2774                vec![
2775                    IndexSpec::new("prod_idx", vec!["product_id".to_string()]).unwrap(),
2776                    IndexSpec::new("order_idx", vec!["order_id".to_string()]).unwrap(),
2777                ],
2778            )
2779            .unwrap()
2780            .table(
2781                "orders",
2782                vec![
2783                    TableColumnConfig::new("order_id", DataType::Int64, false),
2784                    TableColumnConfig::new("customer", DataType::Utf8, false),
2785                ],
2786                vec!["order_id".to_string()],
2787                vec![],
2788            )
2789            .unwrap()
2790            .register_all(&ctx)
2791            .unwrap();
2792
2793        let plan = ctx
2794            .sql(
2795                "SELECT o.customer, p.name, li.qty \
2796                 FROM line_items li \
2797                 JOIN products p ON li.product_id = p.product_id \
2798                 JOIN orders o ON li.order_id = o.order_id",
2799            )
2800            .await;
2801        assert!(plan.is_ok(), "three-way JOIN must plan: {:?}", plan.err());
2802    }
2803
2804    #[test]
2805    fn kv_schema_orders_table_convenience() {
2806        let client = StoreClient::new("http://localhost:10000");
2807        let schema = KvSchema::new(client)
2808            .orders_table(
2809                "my_orders",
2810                vec![IndexSpec::new(
2811                    "region_customer",
2812                    vec!["region".to_string(), "customer_id".to_string()],
2813                )
2814                .unwrap()],
2815            )
2816            .unwrap();
2817        assert_eq!(schema.table_count(), 1);
2818    }
2819
2820    #[test]
2821    fn nullable_column_accepted_in_config() {
2822        let config = KvTableConfig::new(
2823            0,
2824            vec![
2825                TableColumnConfig::new("id", DataType::Int64, false),
2826                TableColumnConfig::new("name", DataType::Utf8, true),
2827            ],
2828            vec!["id".to_string()],
2829            vec![],
2830        );
2831        assert!(config.is_ok());
2832    }
2833
2834    #[test]
2835    fn nullable_column_rejected_in_index() {
2836        let config = KvTableConfig::new(
2837            0,
2838            vec![
2839                TableColumnConfig::new("id", DataType::Int64, false),
2840                TableColumnConfig::new("name", DataType::Utf8, true),
2841            ],
2842            vec!["id".to_string()],
2843            vec![IndexSpec::new("name_idx", vec!["name".to_string()]).unwrap()],
2844        )
2845        .unwrap();
2846        let model = TableModel::from_config(&config).unwrap();
2847        let result = model.resolve_index_specs(&config.index_specs);
2848        assert!(result.is_err());
2849        assert!(result.unwrap_err().contains("nullable"));
2850    }
2851
2852    #[test]
2853    fn base_row_round_trip_with_null() {
2854        let config = KvTableConfig::new(
2855            0,
2856            vec![
2857                TableColumnConfig::new("id", DataType::Int64, false),
2858                TableColumnConfig::new("label", DataType::Utf8, true),
2859                TableColumnConfig::new("score", DataType::Int64, true),
2860            ],
2861            vec!["id".to_string()],
2862            vec![],
2863        )
2864        .unwrap();
2865        let model = TableModel::from_config(&config).unwrap();
2866        let row = KvRow {
2867            values: vec![CellValue::Int64(1), CellValue::Null, CellValue::Int64(42)],
2868        };
2869        let encoded = encode_base_row_value(&row, &model).unwrap();
2870        let decoded = decode_base_row(vec![CellValue::Int64(1)], &encoded, &model).unwrap();
2871        assert!(matches!(&decoded.values[0], CellValue::Int64(1)));
2872        assert!(matches!(&decoded.values[1], CellValue::Null));
2873        assert!(matches!(&decoded.values[2], CellValue::Int64(42)));
2874    }
2875
2876    #[test]
2877    fn null_does_not_match_equality_constraint() {
2878        assert!(!matches_constraint(
2879            &CellValue::Null,
2880            &PredicateConstraint::StringEq("x".to_string())
2881        ));
2882        assert!(!matches_constraint(
2883            &CellValue::Null,
2884            &PredicateConstraint::IntRange {
2885                min: Some(0),
2886                max: Some(10)
2887            }
2888        ));
2889    }
2890
2891    #[test]
2892    fn is_null_constraint_matches() {
2893        assert!(matches_constraint(
2894            &CellValue::Null,
2895            &PredicateConstraint::IsNull
2896        ));
2897        assert!(!matches_constraint(
2898            &CellValue::Utf8("x".to_string()),
2899            &PredicateConstraint::IsNull
2900        ));
2901        assert!(!matches_constraint(
2902            &CellValue::Null,
2903            &PredicateConstraint::IsNotNull
2904        ));
2905        assert!(matches_constraint(
2906            &CellValue::Int64(5),
2907            &PredicateConstraint::IsNotNull
2908        ));
2909    }
2910
2911    #[test]
2912    fn string_in_constraint_matches() {
2913        let constraint =
2914            PredicateConstraint::StringIn(vec!["us-east".to_string(), "us-west".to_string()]);
2915        assert!(matches_constraint(
2916            &CellValue::Utf8("us-east".to_string()),
2917            &constraint,
2918        ));
2919        assert!(matches_constraint(
2920            &CellValue::Utf8("us-west".to_string()),
2921            &constraint,
2922        ));
2923        assert!(!matches_constraint(
2924            &CellValue::Utf8("eu-central".to_string()),
2925            &constraint,
2926        ));
2927    }
2928
2929    #[test]
2930    fn int_in_constraint_matches() {
2931        let constraint = PredicateConstraint::IntIn(vec![1, 2, 3]);
2932        assert!(matches_constraint(&CellValue::Int64(1), &constraint));
2933        assert!(matches_constraint(&CellValue::Int64(3), &constraint));
2934        assert!(!matches_constraint(&CellValue::Int64(4), &constraint));
2935    }
2936
2937    #[test]
2938    fn in_predicate_generates_multiple_index_ranges() {
2939        let (model, specs) = test_model();
2940        let region_idx = *model.columns_by_name.get("region").unwrap();
2941        let mut pred = QueryPredicate::default();
2942        pred.constraints.insert(
2943            region_idx,
2944            PredicateConstraint::StringIn(vec!["us-east".to_string(), "us-west".to_string()]),
2945        );
2946        let plan = pred
2947            .choose_index_plan(&model, &specs)
2948            .expect("plan")
2949            .expect("should find index");
2950        assert_eq!(plan.ranges.len(), 2);
2951    }
2952
2953    #[test]
2954    fn int_in_generates_multiple_pk_ranges() {
2955        let (model, _specs) = test_model();
2956        let mut pred = QueryPredicate::default();
2957        pred.constraints.insert(
2958            model.primary_key_indices[0],
2959            PredicateConstraint::IntIn(vec![100, 200, 300]),
2960        );
2961        let ranges = pred.primary_key_ranges(&model).unwrap();
2962        assert_eq!(ranges.len(), 3);
2963    }
2964
2965    #[test]
2966    fn duplicate_int_in_values_deduplicated() {
2967        let (model, _specs) = test_model();
2968        // Use the PK column "order_id" for the IN list
2969        let filter = Expr::InList(datafusion::logical_expr::expr::InList {
2970            expr: Box::new(Expr::Column(datafusion::common::Column::new_unqualified(
2971                "order_id",
2972            ))),
2973            list: vec![
2974                Expr::Literal(ScalarValue::Int64(Some(5)), None),
2975                Expr::Literal(ScalarValue::Int64(Some(5)), None),
2976                Expr::Literal(ScalarValue::Int64(Some(10)), None),
2977            ],
2978            negated: false,
2979        });
2980        let pred = QueryPredicate::from_filters(&[filter], &model);
2981        let ranges = pred.primary_key_ranges(&model).unwrap();
2982        assert_eq!(
2983            ranges.len(),
2984            2,
2985            "duplicate IN values must be deduped, producing 2 ranges not 3"
2986        );
2987    }
2988
2989    #[test]
2990    fn duplicate_uint64_in_values_deduplicated() {
2991        let config = KvTableConfig::new(
2992            0,
2993            vec![
2994                TableColumnConfig::new("id", DataType::UInt64, false),
2995                TableColumnConfig::new("name", DataType::Utf8, false),
2996            ],
2997            vec!["id".to_string()],
2998            vec![],
2999        )
3000        .unwrap();
3001        let model = TableModel::from_config(&config).unwrap();
3002        let filter = Expr::InList(datafusion::logical_expr::expr::InList {
3003            expr: Box::new(Expr::Column(datafusion::common::Column::new_unqualified(
3004                "id",
3005            ))),
3006            list: vec![
3007                Expr::Literal(ScalarValue::UInt64(Some(100)), None),
3008                Expr::Literal(ScalarValue::UInt64(Some(100)), None),
3009                Expr::Literal(ScalarValue::UInt64(Some(200)), None),
3010            ],
3011            negated: false,
3012        });
3013        let pred = QueryPredicate::from_filters(&[filter], &model);
3014        let ranges = pred.primary_key_ranges(&model).unwrap();
3015        assert_eq!(
3016            ranges.len(),
3017            2,
3018            "duplicate UInt64 IN values must be deduped"
3019        );
3020    }
3021
3022    #[test]
3023    fn duplicate_fixed_binary_in_values_deduplicated() {
3024        let config = KvTableConfig::new(
3025            0,
3026            vec![
3027                TableColumnConfig::new("hash", DataType::FixedSizeBinary(16), false),
3028                TableColumnConfig::new("val", DataType::Int64, false),
3029            ],
3030            vec!["hash".to_string()],
3031            vec![],
3032        )
3033        .unwrap();
3034        let model = TableModel::from_config(&config).unwrap();
3035        let dup_val = vec![0xAA; 16];
3036        let other_val = vec![0xBB; 16];
3037        let filter = Expr::InList(datafusion::logical_expr::expr::InList {
3038            expr: Box::new(Expr::Column(datafusion::common::Column::new_unqualified(
3039                "hash",
3040            ))),
3041            list: vec![
3042                Expr::Literal(
3043                    ScalarValue::FixedSizeBinary(16, Some(dup_val.clone())),
3044                    None,
3045                ),
3046                Expr::Literal(ScalarValue::FixedSizeBinary(16, Some(dup_val)), None),
3047                Expr::Literal(ScalarValue::FixedSizeBinary(16, Some(other_val)), None),
3048            ],
3049            negated: false,
3050        });
3051        let pred = QueryPredicate::from_filters(&[filter], &model);
3052        let ranges = pred.primary_key_ranges(&model).unwrap();
3053        assert_eq!(
3054            ranges.len(),
3055            2,
3056            "duplicate FixedBinary IN values must be deduped"
3057        );
3058    }
3059
3060    #[test]
3061    fn or_equalities_extracted_as_in_list() {
3062        let (model, _) = test_model();
3063        let expr = Expr::BinaryExpr(datafusion::logical_expr::BinaryExpr {
3064            left: Box::new(Expr::BinaryExpr(datafusion::logical_expr::BinaryExpr {
3065                left: Box::new(Expr::Column(datafusion::common::Column::new_unqualified(
3066                    "region",
3067                ))),
3068                op: Operator::Eq,
3069                right: Box::new(Expr::Literal(
3070                    ScalarValue::Utf8(Some("us-east".to_string())),
3071                    None,
3072                )),
3073            })),
3074            op: Operator::Or,
3075            right: Box::new(Expr::BinaryExpr(datafusion::logical_expr::BinaryExpr {
3076                left: Box::new(Expr::Column(datafusion::common::Column::new_unqualified(
3077                    "region",
3078                ))),
3079                op: Operator::Eq,
3080                right: Box::new(Expr::Literal(
3081                    ScalarValue::Utf8(Some("us-west".to_string())),
3082                    None,
3083                )),
3084            })),
3085        });
3086        let result = extract_or_in_column(&expr, &model);
3087        assert!(result.is_some());
3088        let (col, vals) = result.unwrap();
3089        assert_eq!(col, "region");
3090        assert_eq!(vals.len(), 2);
3091    }
3092
3093    #[test]
3094    fn or_equalities_on_float64_are_not_pushdown_supported() {
3095        let config = KvTableConfig::new(
3096            0,
3097            vec![
3098                TableColumnConfig::new("id", DataType::Int64, false),
3099                TableColumnConfig::new("score", DataType::Float64, false),
3100            ],
3101            vec!["id".to_string()],
3102            vec![],
3103        )
3104        .unwrap();
3105        let model = TableModel::from_config(&config).unwrap();
3106
3107        use datafusion::logical_expr::col;
3108        let filter = col("score")
3109            .eq(Expr::Literal(ScalarValue::Float64(Some(1.0)), None))
3110            .or(col("score").eq(Expr::Literal(ScalarValue::Float64(Some(2.0)), None)));
3111
3112        assert!(
3113            !QueryPredicate::supports_filter(&filter, &model),
3114            "OR-equality pushdown should be disabled for Float64 because apply_in_list cannot enforce it"
3115        );
3116
3117        let pred = QueryPredicate::from_filters(&[filter], &model);
3118        assert!(!pred.contradiction);
3119        assert!(
3120            pred.constraints.is_empty(),
3121            "unsupported OR predicate must not contribute pushdown constraints"
3122        );
3123    }
3124
3125    #[test]
3126    fn batch_writer_encodes_rows_across_tables() {
3127        let client = StoreClient::new("http://localhost:10000");
3128        let schema = KvSchema::new(client)
3129            .table(
3130                "customers",
3131                vec![
3132                    TableColumnConfig::new("customer_id", DataType::Int64, false),
3133                    TableColumnConfig::new("name", DataType::Utf8, false),
3134                ],
3135                vec!["customer_id".to_string()],
3136                vec![],
3137            )
3138            .unwrap()
3139            .table(
3140                "orders",
3141                vec![
3142                    TableColumnConfig::new("order_id", DataType::Int64, false),
3143                    TableColumnConfig::new("customer_id", DataType::Int64, false),
3144                    TableColumnConfig::new("amount", DataType::Int64, false),
3145                ],
3146                vec!["order_id".to_string()],
3147                vec![IndexSpec::new("cust_idx", vec!["customer_id".to_string()]).unwrap()],
3148            )
3149            .unwrap();
3150
3151        let mut batch = schema.batch_writer();
3152        batch
3153            .insert(
3154                "customers",
3155                vec![CellValue::Int64(1), CellValue::Utf8("Alice".to_string())],
3156            )
3157            .unwrap();
3158        batch
3159            .insert(
3160                "orders",
3161                vec![
3162                    CellValue::Int64(100),
3163                    CellValue::Int64(1),
3164                    CellValue::Int64(4999),
3165                ],
3166            )
3167            .unwrap();
3168        batch
3169            .insert(
3170                "orders",
3171                vec![
3172                    CellValue::Int64(101),
3173                    CellValue::Int64(1),
3174                    CellValue::Int64(2999),
3175                ],
3176            )
3177            .unwrap();
3178
3179        // 1 customer base row + 2 order base rows + 2 order index rows = 5
3180        assert_eq!(batch.pending_count(), 5);
3181    }
3182
3183    #[test]
3184    fn batch_writer_rejects_unknown_table() {
3185        let client = StoreClient::new("http://localhost:10000");
3186        let schema = KvSchema::new(client)
3187            .table(
3188                "t1",
3189                vec![TableColumnConfig::new("id", DataType::Int64, false)],
3190                vec!["id".to_string()],
3191                vec![],
3192            )
3193            .unwrap();
3194
3195        let mut batch = schema.batch_writer();
3196        let result = batch.insert("nonexistent", vec![CellValue::Int64(1)]);
3197        assert!(result.is_err());
3198        assert!(result.unwrap_err().contains("unknown table"));
3199    }
3200
3201    #[test]
3202    fn batch_writer_rejects_wrong_column_count() {
3203        let client = StoreClient::new("http://localhost:10000");
3204        let schema = KvSchema::new(client)
3205            .table(
3206                "t1",
3207                vec![
3208                    TableColumnConfig::new("id", DataType::Int64, false),
3209                    TableColumnConfig::new("name", DataType::Utf8, false),
3210                ],
3211                vec!["id".to_string()],
3212                vec![],
3213            )
3214            .unwrap();
3215
3216        let mut batch = schema.batch_writer();
3217        let result = batch.insert("t1", vec![CellValue::Int64(1)]);
3218        assert!(result.is_err());
3219        assert!(result.unwrap_err().contains("expected 2"));
3220    }
3221
3222    #[test]
3223    fn batch_writer_rejects_non_pk_type_mismatch() {
3224        let client = StoreClient::new("http://localhost:10000");
3225        let schema = KvSchema::new(client)
3226            .table(
3227                "t1",
3228                vec![
3229                    TableColumnConfig::new("id", DataType::Int64, false),
3230                    TableColumnConfig::new("amount", DataType::Int64, false),
3231                ],
3232                vec!["id".to_string()],
3233                vec![],
3234            )
3235            .unwrap();
3236
3237        let mut batch = schema.batch_writer();
3238        let result = batch.insert(
3239            "t1",
3240            vec![CellValue::Int64(1), CellValue::Utf8("bad".to_string())],
3241        );
3242        assert!(result.is_err());
3243        assert!(
3244            result.unwrap_err().contains("type mismatch"),
3245            "non-PK schema-invalid values must be rejected at insert-time"
3246        );
3247    }
3248
3249    #[test]
3250    fn batch_writer_entries_use_distinct_table_prefixes() {
3251        let client = StoreClient::new("http://localhost:10000");
3252        let schema = KvSchema::new(client)
3253            .table(
3254                "a",
3255                vec![TableColumnConfig::new("id", DataType::Int64, false)],
3256                vec!["id".to_string()],
3257                vec![],
3258            )
3259            .unwrap()
3260            .table(
3261                "b",
3262                vec![TableColumnConfig::new("id", DataType::Int64, false)],
3263                vec!["id".to_string()],
3264                vec![],
3265            )
3266            .unwrap();
3267
3268        let mut batch = schema.batch_writer();
3269        batch.insert("a", vec![CellValue::Int64(42)]).unwrap();
3270        batch.insert("b", vec![CellValue::Int64(42)]).unwrap();
3271
3272        assert_eq!(batch.pending_count(), 2);
3273        assert_ne!(
3274            batch.pending_keys[0], batch.pending_keys[1],
3275            "same PK in different tables must produce different keys"
3276        );
3277        assert_ne!(
3278            batch.pending_keys[0][0], batch.pending_keys[1][0],
3279            "table prefix byte must differ"
3280        );
3281    }
3282
3283    #[tokio::test]
3284    async fn batch_writer_trait_failure_requeues_prepared_before_new_pending() {
3285        let client = StoreClient::new("http://localhost:10000");
3286        let schema = KvSchema::new(client)
3287            .table(
3288                "t",
3289                vec![TableColumnConfig::new("id", DataType::Int64, false)],
3290                vec!["id".to_string()],
3291                vec![],
3292            )
3293            .unwrap();
3294
3295        let mut batch = schema.batch_writer();
3296        batch.insert("t", vec![CellValue::Int64(1)]).unwrap();
3297        let prepared = batch.prepare_flush().unwrap().expect("prepared row");
3298        assert_eq!(prepared.request_id(), 0);
3299        assert_eq!(prepared.entry_count(), 1);
3300
3301        batch.insert("t", vec![CellValue::Int64(2)]).unwrap();
3302        StoreBatchUpload::mark_upload_failed(&batch, prepared, "commit failed".to_string()).await;
3303        assert_eq!(batch.pending_count(), 2);
3304
3305        let retry = batch.prepare_flush().unwrap().expect("retry row");
3306        assert_eq!(retry.request_id(), 0);
3307        assert_eq!(retry.entry_count(), 1);
3308        let next = batch.prepare_flush().unwrap().expect("new pending row");
3309        assert_eq!(next.request_id(), 1);
3310        assert_eq!(next.entry_count(), 1);
3311    }
3312
3313    #[test]
3314    fn batch_writer_supports_nullable_columns() {
3315        let client = StoreClient::new("http://localhost:10000");
3316        let schema = KvSchema::new(client)
3317            .table(
3318                "t",
3319                vec![
3320                    TableColumnConfig::new("id", DataType::Int64, false),
3321                    TableColumnConfig::new("note", DataType::Utf8, true),
3322                ],
3323                vec!["id".to_string()],
3324                vec![],
3325            )
3326            .unwrap();
3327
3328        let mut batch = schema.batch_writer();
3329        batch
3330            .insert("t", vec![CellValue::Int64(1), CellValue::Null])
3331            .unwrap();
3332        assert_eq!(batch.pending_count(), 1);
3333    }
3334
3335    #[test]
3336    fn non_nullable_column_rejects_null_in_batch_writer() {
3337        let client = StoreClient::new("http://localhost:10000");
3338        let schema = KvSchema::new(client)
3339            .table(
3340                "t",
3341                vec![
3342                    TableColumnConfig::new("id", DataType::Int64, false),
3343                    TableColumnConfig::new("name", DataType::Utf8, false),
3344                    TableColumnConfig::new("note", DataType::Utf8, true),
3345                ],
3346                vec!["id".to_string()],
3347                vec![],
3348            )
3349            .unwrap();
3350
3351        // NULL in non-nullable column "name" must fail
3352        let mut batch = schema.batch_writer();
3353        let result = batch.insert(
3354            "t",
3355            vec![
3356                CellValue::Int64(1),
3357                CellValue::Null,
3358                CellValue::Utf8("ok".to_string()),
3359            ],
3360        );
3361        assert!(result.is_err());
3362        assert!(
3363            result.unwrap_err().contains("not nullable"),
3364            "error should mention non-nullable constraint"
3365        );
3366
3367        // NULL in nullable column "note" must succeed
3368        let mut batch = schema.batch_writer();
3369        batch
3370            .insert(
3371                "t",
3372                vec![
3373                    CellValue::Int64(1),
3374                    CellValue::Utf8("Alice".to_string()),
3375                    CellValue::Null,
3376                ],
3377            )
3378            .unwrap();
3379        assert_eq!(batch.pending_count(), 1);
3380
3381        // All non-null values must succeed
3382        let mut batch = schema.batch_writer();
3383        batch
3384            .insert(
3385                "t",
3386                vec![
3387                    CellValue::Int64(1),
3388                    CellValue::Utf8("Alice".to_string()),
3389                    CellValue::Utf8("hello".to_string()),
3390                ],
3391            )
3392            .unwrap();
3393        assert_eq!(batch.pending_count(), 1);
3394    }
3395
3396    #[test]
3397    fn uint64_column_accepted() {
3398        let config = KvTableConfig::new(
3399            0,
3400            vec![
3401                TableColumnConfig::new("id", DataType::UInt64, false),
3402                TableColumnConfig::new("name", DataType::Utf8, false),
3403            ],
3404            vec!["id".to_string()],
3405            vec![],
3406        );
3407        assert!(config.is_ok());
3408    }
3409
3410    #[test]
3411    fn uint64_primary_key_round_trip() {
3412        let config = KvTableConfig::new(
3413            0,
3414            vec![
3415                TableColumnConfig::new("id", DataType::UInt64, false),
3416                TableColumnConfig::new("label", DataType::Utf8, false),
3417            ],
3418            vec!["id".to_string()],
3419            vec![],
3420        )
3421        .unwrap();
3422        let model = TableModel::from_config(&config).unwrap();
3423        let row = KvRow {
3424            values: vec![
3425                CellValue::UInt64(u64::MAX),
3426                CellValue::Utf8("max".to_string()),
3427            ],
3428        };
3429        let encoded = encode_base_row_value(&row, &model).unwrap();
3430        let pk = row
3431            .primary_key_values(&model)
3432            .into_iter()
3433            .cloned()
3434            .collect::<Vec<_>>();
3435        let decoded = decode_base_row(pk, &encoded, &model).unwrap();
3436        assert!(matches!(&decoded.values[0], CellValue::UInt64(v) if *v == u64::MAX));
3437        assert!(matches!(&decoded.values[1], CellValue::Utf8(v) if v == "max"));
3438    }
3439
3440    #[test]
3441    fn string_primary_key_accepted() {
3442        let config = KvTableConfig::new(
3443            0,
3444            vec![
3445                TableColumnConfig::new("code", DataType::Utf8, false),
3446                TableColumnConfig::new("value", DataType::Int64, false),
3447            ],
3448            vec!["code".to_string()],
3449            vec![],
3450        );
3451        assert!(config.is_ok());
3452    }
3453
3454    #[test]
3455    fn fixed_binary_primary_key_round_trip() {
3456        let config = KvTableConfig::new(
3457            0,
3458            vec![
3459                TableColumnConfig::new("hash", DataType::FixedSizeBinary(32), false),
3460                TableColumnConfig::new("amount", DataType::Int64, false),
3461            ],
3462            vec!["hash".to_string()],
3463            vec![],
3464        )
3465        .unwrap();
3466        let model = TableModel::from_config(&config).unwrap();
3467        let hash_val = vec![0xABu8; 32];
3468        let row = KvRow {
3469            values: vec![
3470                CellValue::FixedBinary(hash_val.clone()),
3471                CellValue::Int64(100),
3472            ],
3473        };
3474        let encoded = encode_base_row_value(&row, &model).unwrap();
3475        let pk = row
3476            .primary_key_values(&model)
3477            .into_iter()
3478            .cloned()
3479            .collect::<Vec<_>>();
3480        let decoded = decode_base_row(pk, &encoded, &model).unwrap();
3481        assert!(matches!(&decoded.values[0], CellValue::FixedBinary(v) if *v == hash_val));
3482    }
3483
3484    #[test]
3485    fn fixed_binary_key_rejects_wrong_length() {
3486        let config = KvTableConfig::new(
3487            0,
3488            vec![
3489                TableColumnConfig::new("hash", DataType::FixedSizeBinary(16), false),
3490                TableColumnConfig::new("amount", DataType::Int64, false),
3491            ],
3492            vec!["hash".to_string()],
3493            vec![],
3494        )
3495        .unwrap();
3496        let model = TableModel::from_config(&config).unwrap();
3497
3498        // Too short (10 bytes for a 16-byte column)
3499        let short_row = KvRow {
3500            values: vec![CellValue::FixedBinary(vec![0xAB; 10]), CellValue::Int64(1)],
3501        };
3502        let result = encode_primary_key_from_row(model.table_prefix, &short_row, &model);
3503        assert!(result.is_err());
3504        assert!(
3505            result.unwrap_err().contains("requires exactly 16 bytes"),
3506            "should mention exact width requirement"
3507        );
3508
3509        // Too long (20 bytes for a 16-byte column)
3510        let long_row = KvRow {
3511            values: vec![CellValue::FixedBinary(vec![0xCD; 20]), CellValue::Int64(2)],
3512        };
3513        let result = encode_primary_key_from_row(model.table_prefix, &long_row, &model);
3514        assert!(result.is_err());
3515        assert!(result.unwrap_err().contains("requires exactly 16 bytes"));
3516
3517        // Exact length (16 bytes) — must succeed
3518        let ok_row = KvRow {
3519            values: vec![CellValue::FixedBinary(vec![0xEF; 16]), CellValue::Int64(3)],
3520        };
3521        assert!(encode_primary_key_from_row(model.table_prefix, &ok_row, &model).is_ok());
3522    }
3523
3524    #[test]
3525    fn fixed_binary_index_key_rejects_wrong_length() {
3526        let config = KvTableConfig::new(
3527            0,
3528            vec![
3529                TableColumnConfig::new("id", DataType::Int64, false),
3530                TableColumnConfig::new("tag", DataType::FixedSizeBinary(8), false),
3531            ],
3532            vec!["id".to_string()],
3533            vec![IndexSpec::new("tag_idx", vec!["tag".to_string()]).unwrap()],
3534        )
3535        .unwrap();
3536        let model = TableModel::from_config(&config).unwrap();
3537        let specs = model.resolve_index_specs(&config.index_specs).unwrap();
3538
3539        // Wrong length (4 bytes for an 8-byte column)
3540        let bad_row = KvRow {
3541            values: vec![CellValue::Int64(1), CellValue::FixedBinary(vec![0x01; 4])],
3542        };
3543        let result = encode_secondary_index_key(model.table_prefix, &specs[0], &model, &bad_row);
3544        assert!(result.is_err());
3545        assert!(result.unwrap_err().contains("requires exactly 8 bytes"));
3546
3547        // Correct length (8 bytes)
3548        let ok_row = KvRow {
3549            values: vec![CellValue::Int64(1), CellValue::FixedBinary(vec![0x02; 8])],
3550        };
3551        assert!(encode_secondary_index_key(model.table_prefix, &specs[0], &model, &ok_row).is_ok());
3552    }
3553
3554    #[test]
3555    fn decimal256_column_round_trip() {
3556        let config = KvTableConfig::new(
3557            0,
3558            vec![
3559                TableColumnConfig::new("id", DataType::Int64, false),
3560                TableColumnConfig::new("balance", DataType::Decimal256(76, 0), false),
3561            ],
3562            vec!["id".to_string()],
3563            vec![],
3564        )
3565        .unwrap();
3566        let model = TableModel::from_config(&config).unwrap();
3567        let big_val = i256::from(123456789012345i64);
3568        let row = KvRow {
3569            values: vec![CellValue::Int64(1), CellValue::Decimal256(big_val)],
3570        };
3571        let encoded = encode_base_row_value(&row, &model).unwrap();
3572        let pk = row
3573            .primary_key_values(&model)
3574            .into_iter()
3575            .cloned()
3576            .collect::<Vec<_>>();
3577        let decoded = decode_base_row(pk, &encoded, &model).unwrap();
3578        assert!(matches!(&decoded.values[1], CellValue::Decimal256(v) if *v == big_val));
3579    }
3580
3581    #[test]
3582    fn float64_primary_key_rejected() {
3583        let config = KvTableConfig::new(
3584            0,
3585            vec![TableColumnConfig::new("id", DataType::Float64, false)],
3586            vec!["id".to_string()],
3587            vec![],
3588        );
3589        assert!(config.is_err());
3590    }
3591
3592    #[test]
3593    fn i256_ordered_encoding_round_trip() {
3594        let values = [
3595            i256::from_i128(i128::MIN),
3596            i256::from(-1i64),
3597            i256::from(0i64),
3598            i256::from(1i64),
3599            i256::from_i128(i128::MAX),
3600        ];
3601        for v in values {
3602            assert_eq!(decode_i256_ordered(encode_i256_ordered(v)), v);
3603        }
3604        let encoded: Vec<[u8; 32]> = values.iter().map(|v| encode_i256_ordered(*v)).collect();
3605        for i in 0..encoded.len() - 1 {
3606            assert!(encoded[i] < encoded[i + 1]);
3607        }
3608    }
3609
3610    #[test]
3611    fn uint64_primary_key_encode_decode() {
3612        let config = KvTableConfig::new(
3613            5,
3614            vec![
3615                TableColumnConfig::new("id", DataType::UInt64, false),
3616                TableColumnConfig::new("name", DataType::Utf8, false),
3617            ],
3618            vec!["id".to_string()],
3619            vec![],
3620        )
3621        .unwrap();
3622        let model = TableModel::from_config(&config).unwrap();
3623        let pk = CellValue::UInt64(12345);
3624        let key = encode_primary_key(5, &[&pk], &model).expect("pk key encodes");
3625        let decoded = decode_primary_key(5, &key, &model).unwrap();
3626        assert!(matches!(&decoded[0], CellValue::UInt64(12345)));
3627    }
3628
3629    #[test]
3630    fn utf8_primary_key_encode_decode() {
3631        let config = KvTableConfig::new(
3632            3,
3633            vec![
3634                TableColumnConfig::new("code", DataType::Utf8, false),
3635                TableColumnConfig::new("val", DataType::Int64, false),
3636            ],
3637            vec!["code".to_string()],
3638            vec![],
3639        )
3640        .unwrap();
3641        let model = TableModel::from_config(&config).unwrap();
3642        let pk = CellValue::Utf8("HELLO".to_string());
3643        let key = encode_primary_key(3, &[&pk], &model).expect("pk key encodes");
3644        let decoded = decode_primary_key(3, &key, &model).unwrap();
3645        assert!(matches!(&decoded[0], CellValue::Utf8(v) if v == "HELLO"));
3646    }
3647
3648    #[test]
3649    fn fixed_binary_primary_key_encode_decode() {
3650        let config = KvTableConfig::new(
3651            7,
3652            vec![
3653                TableColumnConfig::new("hash", DataType::FixedSizeBinary(16), false),
3654                TableColumnConfig::new("val", DataType::Int64, false),
3655            ],
3656            vec!["hash".to_string()],
3657            vec![],
3658        )
3659        .unwrap();
3660        let model = TableModel::from_config(&config).unwrap();
3661        let data = vec![0xDE, 0xAD, 0xBE, 0xEF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
3662        let pk = CellValue::FixedBinary(data.clone());
3663        let key = encode_primary_key(7, &[&pk], &model).expect("pk key encodes");
3664        let decoded = decode_primary_key(7, &key, &model).unwrap();
3665        assert!(matches!(&decoded[0], CellValue::FixedBinary(v) if *v == data));
3666    }
3667
3668    #[test]
3669    fn secondary_index_with_uint64_column() {
3670        let config = KvTableConfig::new(
3671            0,
3672            vec![
3673                TableColumnConfig::new("id", DataType::Int64, false),
3674                TableColumnConfig::new("counter", DataType::UInt64, false),
3675            ],
3676            vec!["id".to_string()],
3677            vec![IndexSpec::new("counter_idx", vec!["counter".to_string()]).unwrap()],
3678        )
3679        .unwrap();
3680        let model = TableModel::from_config(&config).unwrap();
3681        let specs = model.resolve_index_specs(&config.index_specs).unwrap();
3682        let row = KvRow {
3683            values: vec![CellValue::Int64(1), CellValue::UInt64(999)],
3684        };
3685        let key = encode_secondary_index_key(model.table_prefix, &specs[0], &model, &row).unwrap();
3686        let decoded =
3687            decode_secondary_index_key(model.table_prefix, &specs[0], &model, &key).unwrap();
3688        let counter_idx = *model.columns_by_name.get("counter").unwrap();
3689        assert!(matches!(
3690            decoded.values.get(&counter_idx),
3691            Some(CellValue::UInt64(999))
3692        ));
3693        assert!(matches!(
3694            &decoded.primary_key_values[0],
3695            CellValue::Int64(1)
3696        ));
3697    }
3698
3699    #[test]
3700    fn secondary_index_with_decimal256_column() {
3701        let config = KvTableConfig::new(
3702            0,
3703            vec![
3704                TableColumnConfig::new("id", DataType::Int64, false),
3705                TableColumnConfig::new("big_val", DataType::Decimal256(76, 0), false),
3706            ],
3707            vec!["id".to_string()],
3708            vec![IndexSpec::new("big_idx", vec!["big_val".to_string()]).unwrap()],
3709        )
3710        .unwrap();
3711        let model = TableModel::from_config(&config).unwrap();
3712        let specs = model.resolve_index_specs(&config.index_specs).unwrap();
3713        let val = i256::from(42i64);
3714        let row = KvRow {
3715            values: vec![CellValue::Int64(1), CellValue::Decimal256(val)],
3716        };
3717        let key = encode_secondary_index_key(model.table_prefix, &specs[0], &model, &row).unwrap();
3718        let decoded =
3719            decode_secondary_index_key(model.table_prefix, &specs[0], &model, &key).unwrap();
3720        let big_idx = *model.columns_by_name.get("big_val").unwrap();
3721        assert!(matches!(
3722            decoded.values.get(&big_idx),
3723            Some(CellValue::Decimal256(v)) if *v == val
3724        ));
3725    }
3726
3727    // -----------------------------------------------------------------------
3728    // Composite primary key tests
3729    // -----------------------------------------------------------------------
3730
3731    #[test]
3732    fn composite_pk_config_accepted() {
3733        let config = KvTableConfig::new(
3734            0,
3735            vec![
3736                TableColumnConfig::new("entity", DataType::FixedSizeBinary(32), false),
3737                TableColumnConfig::new("version", DataType::UInt64, false),
3738                TableColumnConfig::new("data", DataType::Utf8, true),
3739            ],
3740            vec!["entity".to_string(), "version".to_string()],
3741            vec![],
3742        );
3743        assert!(config.is_ok());
3744        let c = config.unwrap();
3745        assert_eq!(c.primary_key_columns, vec!["entity", "version"]);
3746    }
3747
3748    #[test]
3749    fn composite_pk_rejects_unsupported_type() {
3750        let result = KvTableConfig::new(
3751            0,
3752            vec![
3753                TableColumnConfig::new("entity", DataType::FixedSizeBinary(32), false),
3754                TableColumnConfig::new("score", DataType::Float64, false),
3755            ],
3756            vec!["entity".to_string(), "score".to_string()],
3757            vec![],
3758        );
3759        assert!(result.is_err());
3760        let err = result.unwrap_err();
3761        assert!(
3762            err.contains("must be Int64") || err.contains("must be"),
3763            "expected PK type error, got: {err}"
3764        );
3765    }
3766
3767    #[test]
3768    fn composite_pk_rejects_too_wide() {
3769        let config = KvTableConfig::new(
3770            0,
3771            vec![
3772                TableColumnConfig::new("big", DataType::FixedSizeBinary(60), false),
3773                TableColumnConfig::new("ver", DataType::UInt64, false),
3774            ],
3775            vec!["big".to_string(), "ver".to_string()],
3776            vec![],
3777        )
3778        .expect("variable-length keys should allow wider composite PKs");
3779        let model = TableModel::from_config(&config).expect("model");
3780        assert_eq!(model.primary_key_width, 68);
3781    }
3782
3783    #[test]
3784    fn composite_pk_encode_decode_round_trip() {
3785        let config = KvTableConfig::new(
3786            1,
3787            vec![
3788                TableColumnConfig::new("entity", DataType::FixedSizeBinary(32), false),
3789                TableColumnConfig::new("version", DataType::UInt64, false),
3790                TableColumnConfig::new("title", DataType::Utf8, true),
3791            ],
3792            vec!["entity".to_string(), "version".to_string()],
3793            vec![],
3794        )
3795        .unwrap();
3796        let model = TableModel::from_config(&config).unwrap();
3797
3798        let entity = vec![0xAA; 32];
3799        let pk_entity = CellValue::FixedBinary(entity.clone());
3800        let pk_version = CellValue::UInt64(42);
3801        let key =
3802            encode_primary_key(1, &[&pk_entity, &pk_version], &model).expect("pk key encodes");
3803
3804        let decoded = decode_primary_key(1, &key, &model).unwrap();
3805        assert_eq!(decoded.len(), 2);
3806        assert!(matches!(&decoded[0], CellValue::FixedBinary(v) if *v == entity));
3807        assert!(matches!(&decoded[1], CellValue::UInt64(42)));
3808    }
3809
3810    #[test]
3811    fn composite_pk_version_sort_order() {
3812        let config = KvTableConfig::new(
3813            0,
3814            vec![
3815                TableColumnConfig::new("entity", DataType::FixedSizeBinary(32), false),
3816                TableColumnConfig::new("version", DataType::UInt64, false),
3817            ],
3818            vec!["entity".to_string(), "version".to_string()],
3819            vec![],
3820        )
3821        .unwrap();
3822        let model = TableModel::from_config(&config).unwrap();
3823
3824        let entity = vec![0xBB; 32];
3825        let pk_entity = CellValue::FixedBinary(entity.clone());
3826
3827        let key_v1 = encode_primary_key(0, &[&pk_entity, &CellValue::UInt64(1)], &model)
3828            .expect("pk key encodes");
3829        let key_v10 = encode_primary_key(0, &[&pk_entity, &CellValue::UInt64(10)], &model)
3830            .expect("pk key encodes");
3831        let key_v100 = encode_primary_key(0, &[&pk_entity, &CellValue::UInt64(100)], &model)
3832            .expect("pk key encodes");
3833
3834        // Versions must sort numerically (big-endian U64)
3835        assert!(key_v1 < key_v10);
3836        assert!(key_v10 < key_v100);
3837    }
3838
3839    #[test]
3840    fn composite_pk_value_excludes_all_pk_columns() {
3841        let config = KvTableConfig::new(
3842            0,
3843            vec![
3844                TableColumnConfig::new("entity", DataType::FixedSizeBinary(16), false),
3845                TableColumnConfig::new("version", DataType::UInt64, false),
3846                TableColumnConfig::new("data", DataType::Utf8, true),
3847            ],
3848            vec!["entity".to_string(), "version".to_string()],
3849            vec![],
3850        )
3851        .unwrap();
3852        let model = TableModel::from_config(&config).unwrap();
3853
3854        let row = KvRow {
3855            values: vec![
3856                CellValue::FixedBinary(vec![0xCC; 16]),
3857                CellValue::UInt64(7),
3858                CellValue::Utf8("hello".to_string()),
3859            ],
3860        };
3861        let encoded = encode_base_row_value(&row, &model).unwrap();
3862        // Both PK columns should be None in stored value
3863        let decoded = decode_base_row(
3864            vec![CellValue::FixedBinary(vec![0xCC; 16]), CellValue::UInt64(7)],
3865            &encoded,
3866            &model,
3867        )
3868        .unwrap();
3869        assert!(matches!(&decoded.values[0], CellValue::FixedBinary(v) if v.len() == 16));
3870        assert!(matches!(&decoded.values[1], CellValue::UInt64(7)));
3871        assert!(matches!(&decoded.values[2], CellValue::Utf8(v) if v == "hello"));
3872    }
3873
3874    #[test]
3875    fn composite_pk_secondary_index_appends_all_pk_columns() {
3876        let config = KvTableConfig::new(
3877            0,
3878            vec![
3879                TableColumnConfig::new("entity", DataType::FixedSizeBinary(16), false),
3880                TableColumnConfig::new("version", DataType::UInt64, false),
3881                TableColumnConfig::new("tag", DataType::Int64, false),
3882            ],
3883            vec!["entity".to_string(), "version".to_string()],
3884            vec![IndexSpec::new("tag_idx", vec!["tag".to_string()]).unwrap()],
3885        )
3886        .unwrap();
3887        let model = TableModel::from_config(&config).unwrap();
3888        let specs = model.resolve_index_specs(&config.index_specs).unwrap();
3889
3890        let entity_data = vec![0xDD; 16];
3891        let row = KvRow {
3892            values: vec![
3893                CellValue::FixedBinary(entity_data.clone()),
3894                CellValue::UInt64(99),
3895                CellValue::Int64(42),
3896            ],
3897        };
3898        let key = encode_secondary_index_key(model.table_prefix, &specs[0], &model, &row).unwrap();
3899        let decoded =
3900            decode_secondary_index_key(model.table_prefix, &specs[0], &model, &key).unwrap();
3901
3902        assert_eq!(decoded.primary_key_values.len(), 2);
3903        assert!(matches!(
3904            &decoded.primary_key_values[0],
3905            CellValue::FixedBinary(v) if *v == entity_data
3906        ));
3907        assert!(matches!(
3908            &decoded.primary_key_values[1],
3909            CellValue::UInt64(99)
3910        ));
3911        let tag_idx = *model.columns_by_name.get("tag").unwrap();
3912        assert!(matches!(
3913            decoded.values.get(&tag_idx),
3914            Some(CellValue::Int64(42))
3915        ));
3916    }
3917
3918    #[test]
3919    fn table_versioned_convenience() {
3920        let client = StoreClient::new("http://localhost:10000");
3921        let schema = KvSchema::new(client)
3922            .table_versioned(
3923                "documents",
3924                vec![
3925                    TableColumnConfig::new("doc_id", DataType::FixedSizeBinary(32), false),
3926                    TableColumnConfig::new("version", DataType::UInt64, false),
3927                    TableColumnConfig::new("title", DataType::Utf8, false),
3928                ],
3929                "doc_id",
3930                "version",
3931                vec![],
3932            )
3933            .unwrap();
3934        assert_eq!(schema.table_count(), 1);
3935    }
3936
3937    #[test]
3938    fn single_column_pk_backward_compat() {
3939        // Ensure single-column PK still works identically
3940        let config = KvTableConfig::new(
3941            0,
3942            vec![
3943                TableColumnConfig::new("id", DataType::Int64, false),
3944                TableColumnConfig::new("name", DataType::Utf8, true),
3945            ],
3946            vec!["id".to_string()],
3947            vec![],
3948        )
3949        .unwrap();
3950        let model = TableModel::from_config(&config).unwrap();
3951        assert_eq!(model.primary_key_indices.len(), 1);
3952        assert_eq!(model.primary_key_indices[0], 0);
3953        assert_eq!(model.primary_key_width, 8);
3954
3955        let pk = CellValue::Int64(42);
3956        let key = encode_primary_key(0, &[&pk], &model).expect("pk key encodes");
3957        let decoded = decode_primary_key(0, &key, &model).unwrap();
3958        assert_eq!(decoded.len(), 1);
3959        assert!(matches!(&decoded[0], CellValue::Int64(42)));
3960    }
3961
3962    #[test]
3963    fn partial_prefix_upper_bound_fills_trailing_pk_bytes() {
3964        // Regression: encode_primary_key_bound with partial prefix must
3965        // fill 0xFF from the end of the encoded prefix, not from the
3966        // end of the full PK width. Otherwise trailing PK column bytes
3967        // stay 0x00, producing an end key that's too low.
3968        let config = KvTableConfig::new(
3969            0,
3970            vec![
3971                TableColumnConfig::new("entity", DataType::FixedSizeBinary(16), false),
3972                TableColumnConfig::new("version", DataType::UInt64, false),
3973            ],
3974            vec!["entity".to_string(), "version".to_string()],
3975            vec![],
3976        )
3977        .unwrap();
3978        let model = TableModel::from_config(&config).unwrap();
3979        assert_eq!(model.primary_key_width, 24); // 16 + 8
3980
3981        let entity = CellValue::FixedBinary(vec![0xAA; 16]);
3982        // Partial prefix: only entity, no version
3983        let upper =
3984            encode_primary_key_bound(0, &[&entity], &model, true).expect("pk bound encodes");
3985
3986        // Entity bytes must be encoded
3987        assert_eq!(primary_payload(&model, &upper, 0, 16), vec![0xAA; 16]);
3988        // Version bytes (8 bytes after entity) MUST be 0xFF, not 0x00
3989        assert_eq!(
3990            primary_payload(&model, &upper, 16, 8),
3991            vec![0xFF; 8],
3992            "trailing PK column (version) must be 0xFF for upper bound"
3993        );
3994        // Everything after PK region also 0xFF
3995        assert!(primary_payload(
3996            &model,
3997            &upper,
3998            24,
3999            model.primary_key_codec.payload_capacity_bytes() - 24
4000        )
4001        .iter()
4002        .all(|&b| b == 0xFF));
4003
4004        // Lower bound: trailing bytes should be 0x00
4005        let lower =
4006            encode_primary_key_bound(0, &[&entity], &model, false).expect("pk bound encodes");
4007        assert_eq!(primary_payload(&model, &lower, 0, 16), vec![0xAA; 16]);
4008        assert_eq!(
4009            primary_payload(&model, &lower, 16, 8),
4010            vec![0x00; 8],
4011            "trailing PK column (version) must be 0x00 for lower bound"
4012        );
4013    }
4014
4015    // -----------------------------------------------------------------------
4016    // Composite PK filter pushdown tests
4017    // -----------------------------------------------------------------------
4018
4019    #[test]
4020    fn composite_pk_range_pushdown_entity_eq_version_lte() {
4021        // PK = (entity: FixedSizeBinary(16), version: UInt64)
4022        // Query: entity = X'CC..CC' AND version <= 42
4023        // Should produce a TIGHT range, not a full table scan.
4024        let config = KvTableConfig::new(
4025            0,
4026            vec![
4027                TableColumnConfig::new("entity", DataType::FixedSizeBinary(16), false),
4028                TableColumnConfig::new("version", DataType::UInt64, false),
4029                TableColumnConfig::new("data", DataType::Utf8, true),
4030            ],
4031            vec!["entity".to_string(), "version".to_string()],
4032            vec![],
4033        )
4034        .unwrap();
4035        let model = TableModel::from_config(&config).unwrap();
4036
4037        // Simulate predicate: entity = X'CC..CC' AND version <= 42
4038        let mut pred = QueryPredicate::default();
4039        pred.constraints
4040            .insert(0, PredicateConstraint::FixedBinaryEq(vec![0xCC; 16]));
4041        pred.constraints.insert(
4042            1,
4043            PredicateConstraint::UInt64Range {
4044                min: None,
4045                max: Some(42),
4046            },
4047        );
4048
4049        let ranges = pred.primary_key_ranges(&model).unwrap();
4050        assert_eq!(ranges.len(), 1, "should produce exactly one range");
4051
4052        let range = &ranges[0];
4053
4054        // The start key should encode entity=CC..CC, version=0
4055        let expected_start = encode_primary_key(
4056            0,
4057            &[
4058                &CellValue::FixedBinary(vec![0xCC; 16]),
4059                &CellValue::UInt64(0),
4060            ],
4061            &model,
4062        )
4063        .expect("pk key encodes");
4064        assert_eq!(
4065            range.start, expected_start,
4066            "start should be entity=CC..CC, version=0"
4067        );
4068
4069        // The end key should encode entity=CC..CC, version=42, then 0xFF tail
4070        let expected_end_prefix = encode_primary_key(
4071            0,
4072            &[
4073                &CellValue::FixedBinary(vec![0xCC; 16]),
4074                &CellValue::UInt64(42),
4075            ],
4076            &model,
4077        )
4078        .expect("pk key encodes");
4079        // The end key has 0xFF-filled tail after the PK portion
4080        assert_eq!(
4081            primary_payload(&model, &range.end, 0, model.primary_key_width),
4082            primary_payload(&model, &expected_end_prefix, 0, model.primary_key_width),
4083            "end prefix should be entity=CC..CC, version=42"
4084        );
4085        // Trailing bytes after PK should be 0xFF
4086        assert!(
4087            primary_payload(
4088                &model,
4089                &range.end,
4090                model.primary_key_width,
4091                model.primary_key_codec.payload_capacity_bytes() - model.primary_key_width
4092            )
4093            .iter()
4094            .all(|&b| b == 0xFF),
4095            "end trailing bytes should be 0xFF"
4096        );
4097
4098        // Crucially, the range must NOT be a full table scan
4099        let full_range = primary_key_prefix_range(0);
4100        assert_ne!(
4101            range.start, full_range.start,
4102            "range must not be a full table scan"
4103        );
4104    }
4105
4106    #[test]
4107    fn composite_pk_range_pushdown_entity_eq_only() {
4108        // PK = (entity: FixedSizeBinary(16), version: UInt64)
4109        // Query: entity = X'DD..DD' (no version constraint)
4110        // Should still produce a tight entity-prefix range.
4111        let config = KvTableConfig::new(
4112            0,
4113            vec![
4114                TableColumnConfig::new("entity", DataType::FixedSizeBinary(16), false),
4115                TableColumnConfig::new("version", DataType::UInt64, false),
4116            ],
4117            vec!["entity".to_string(), "version".to_string()],
4118            vec![],
4119        )
4120        .unwrap();
4121        let model = TableModel::from_config(&config).unwrap();
4122
4123        let mut pred = QueryPredicate::default();
4124        pred.constraints
4125            .insert(0, PredicateConstraint::FixedBinaryEq(vec![0xDD; 16]));
4126
4127        let ranges = pred.primary_key_ranges(&model).unwrap();
4128        assert_eq!(ranges.len(), 1);
4129
4130        let range = &ranges[0];
4131        // Start should have entity=DD..DD, version=0x00..00
4132        assert_eq!(primary_payload(&model, &range.start, 0, 16), vec![0xDD; 16]);
4133        // End should have entity=DD..DD, then 0xFF for version + tail
4134        assert_eq!(primary_payload(&model, &range.end, 0, 16), vec![0xDD; 16]);
4135        assert!(
4136            primary_payload(
4137                &model,
4138                &range.end,
4139                16,
4140                model.primary_key_codec.payload_capacity_bytes() - 16
4141            )
4142            .iter()
4143            .all(|&b| b == 0xFF),
4144            "after entity bytes, everything should be 0xFF"
4145        );
4146    }
4147
4148    #[test]
4149    fn fixed_binary_eq_constraint_extracted() {
4150        let config = KvTableConfig::new(
4151            0,
4152            vec![
4153                TableColumnConfig::new("entity", DataType::FixedSizeBinary(16), false),
4154                TableColumnConfig::new("version", DataType::UInt64, false),
4155            ],
4156            vec!["entity".to_string(), "version".to_string()],
4157            vec![],
4158        )
4159        .unwrap();
4160        let model = TableModel::from_config(&config).unwrap();
4161
4162        // Build an equality expression: entity = X'AA..AA'
4163        use datafusion::logical_expr::col;
4164        let entity_literal =
4165            Expr::Literal(ScalarValue::FixedSizeBinary(16, Some(vec![0xAA; 16])), None);
4166        let filter = col("entity").eq(entity_literal);
4167
4168        assert!(
4169            QueryPredicate::supports_filter(&filter, &model),
4170            "FixedSizeBinary equality should be supported"
4171        );
4172
4173        let pred = QueryPredicate::from_filters(&[filter], &model);
4174        assert!(
4175            matches!(
4176                pred.constraints.get(&0),
4177                Some(PredicateConstraint::FixedBinaryEq(v)) if *v == vec![0xAA; 16]
4178            ),
4179            "should extract FixedBinaryEq constraint"
4180        );
4181    }
4182
4183    #[test]
4184    fn uint64_range_constraint_extracted() {
4185        let config = KvTableConfig::new(
4186            0,
4187            vec![
4188                TableColumnConfig::new("version", DataType::UInt64, false),
4189                TableColumnConfig::new("data", DataType::Utf8, true),
4190            ],
4191            vec!["version".to_string()],
4192            vec![],
4193        )
4194        .unwrap();
4195        let model = TableModel::from_config(&config).unwrap();
4196
4197        use datafusion::logical_expr::col;
4198        let filter = col("version").lt_eq(Expr::Literal(ScalarValue::UInt64(Some(42)), None));
4199
4200        assert!(
4201            QueryPredicate::supports_filter(&filter, &model),
4202            "UInt64 range should be supported"
4203        );
4204
4205        let pred = QueryPredicate::from_filters(&[filter], &model);
4206        assert!(
4207            matches!(
4208                pred.constraints.get(&0),
4209                Some(PredicateConstraint::UInt64Range {
4210                    min: None,
4211                    max: Some(42)
4212                })
4213            ),
4214            "should extract UInt64Range with max=42"
4215        );
4216    }
4217
4218    #[test]
4219    fn uint64_range_constraint_supports_values_above_i64_max() {
4220        let config = KvTableConfig::new(
4221            0,
4222            vec![
4223                TableColumnConfig::new("version", DataType::UInt64, false),
4224                TableColumnConfig::new("data", DataType::Utf8, true),
4225            ],
4226            vec!["version".to_string()],
4227            vec![],
4228        )
4229        .unwrap();
4230        let model = TableModel::from_config(&config).unwrap();
4231
4232        let threshold = (1u64 << 63) + 5;
4233        use datafusion::logical_expr::col;
4234        let filter =
4235            col("version").gt_eq(Expr::Literal(ScalarValue::UInt64(Some(threshold)), None));
4236
4237        assert!(QueryPredicate::supports_filter(&filter, &model));
4238
4239        let pred = QueryPredicate::from_filters(&[filter], &model);
4240        assert!(matches!(
4241            pred.constraints.get(&0),
4242            Some(PredicateConstraint::UInt64Range {
4243                min: Some(v),
4244                max: None
4245            }) if *v == threshold
4246        ));
4247    }
4248
4249    #[test]
4250    fn unsupported_uint64_comparison_does_not_force_contradiction() {
4251        let config = KvTableConfig::new(
4252            0,
4253            vec![
4254                TableColumnConfig::new("version", DataType::UInt64, false),
4255                TableColumnConfig::new("data", DataType::Utf8, true),
4256            ],
4257            vec!["version".to_string()],
4258            vec![],
4259        )
4260        .unwrap();
4261        let model = TableModel::from_config(&config).unwrap();
4262
4263        use datafusion::logical_expr::col;
4264        let unsupported = col("version").gt(Expr::Literal(ScalarValue::Int64(Some(-1)), None));
4265
4266        assert!(
4267            !QueryPredicate::supports_filter(&unsupported, &model),
4268            "negative Int64 literal on UInt64 column should not be pushdown-supported"
4269        );
4270
4271        let pred = QueryPredicate::from_filters(&[unsupported], &model);
4272        assert!(
4273            !pred.contradiction,
4274            "unsupported filter must not collapse scan to empty result"
4275        );
4276        assert!(
4277            pred.constraints.is_empty(),
4278            "unsupported filter must not contribute pushed constraints"
4279        );
4280    }
4281
4282    #[test]
4283    fn unsupported_uint64_comparison_in_and_keeps_supported_sibling() {
4284        let config = KvTableConfig::new(
4285            0,
4286            vec![
4287                TableColumnConfig::new("version", DataType::UInt64, false),
4288                TableColumnConfig::new("data", DataType::Utf8, true),
4289            ],
4290            vec!["version".to_string()],
4291            vec![],
4292        )
4293        .unwrap();
4294        let model = TableModel::from_config(&config).unwrap();
4295
4296        use datafusion::logical_expr::col;
4297        let supported = col("version").gt_eq(Expr::Literal(ScalarValue::UInt64(Some(10)), None));
4298        let unsupported = col("version").gt(Expr::Literal(ScalarValue::Int64(Some(-1)), None));
4299        let filter = supported.and(unsupported);
4300
4301        assert!(
4302            !QueryPredicate::supports_filter(&filter, &model),
4303            "mixed AND should not be marked fully pushdown-supported"
4304        );
4305
4306        let pred = QueryPredicate::from_filters(&[filter], &model);
4307        assert!(!pred.contradiction);
4308        assert!(matches!(
4309            pred.constraints.get(&0),
4310            Some(PredicateConstraint::UInt64Range {
4311                min: Some(10),
4312                max: None
4313            })
4314        ));
4315    }
4316
4317    #[test]
4318    fn uint64_in_list_pushdown() {
4319        let config = KvTableConfig::new(
4320            0,
4321            vec![
4322                TableColumnConfig::new("version", DataType::UInt64, false),
4323                TableColumnConfig::new("data", DataType::Utf8, true),
4324            ],
4325            vec!["version".to_string()],
4326            vec![],
4327        )
4328        .unwrap();
4329        let model = TableModel::from_config(&config).unwrap();
4330
4331        use datafusion::logical_expr::{col, in_list};
4332        let filter = in_list(
4333            col("version"),
4334            vec![
4335                Expr::Literal(ScalarValue::UInt64(Some(1)), None),
4336                Expr::Literal(ScalarValue::UInt64(Some(5)), None),
4337                Expr::Literal(ScalarValue::UInt64(Some(10)), None),
4338            ],
4339            false,
4340        );
4341
4342        assert!(QueryPredicate::supports_filter(&filter, &model));
4343        let pred = QueryPredicate::from_filters(&[filter], &model);
4344        assert!(
4345            matches!(pred.constraints.get(&0), Some(PredicateConstraint::UInt64In(v)) if v.len() == 3),
4346            "should extract UInt64In with 3 values"
4347        );
4348    }
4349
4350    #[test]
4351    fn uint64_in_list_pushdown_supports_values_above_i64_max() {
4352        let config = KvTableConfig::new(
4353            0,
4354            vec![
4355                TableColumnConfig::new("version", DataType::UInt64, false),
4356                TableColumnConfig::new("data", DataType::Utf8, true),
4357            ],
4358            vec!["version".to_string()],
4359            vec![],
4360        )
4361        .unwrap();
4362        let model = TableModel::from_config(&config).unwrap();
4363
4364        let huge = 1u64 << 63;
4365        use datafusion::logical_expr::{col, in_list};
4366        let filter = in_list(
4367            col("version"),
4368            vec![
4369                Expr::Literal(ScalarValue::UInt64(Some(1)), None),
4370                Expr::Literal(ScalarValue::UInt64(Some(huge)), None),
4371            ],
4372            false,
4373        );
4374
4375        assert!(QueryPredicate::supports_filter(&filter, &model));
4376        let pred = QueryPredicate::from_filters(&[filter], &model);
4377        assert!(matches!(
4378            pred.constraints.get(&0),
4379            Some(PredicateConstraint::UInt64In(v)) if v.contains(&huge) && v.len() == 2
4380        ));
4381    }
4382
4383    #[test]
4384    fn fixed_binary_in_list_pushdown() {
4385        let config = KvTableConfig::new(
4386            0,
4387            vec![
4388                TableColumnConfig::new("entity", DataType::FixedSizeBinary(16), false),
4389                TableColumnConfig::new("data", DataType::Utf8, true),
4390            ],
4391            vec!["entity".to_string()],
4392            vec![],
4393        )
4394        .unwrap();
4395        let model = TableModel::from_config(&config).unwrap();
4396
4397        use datafusion::logical_expr::{col, in_list};
4398        let filter = in_list(
4399            col("entity"),
4400            vec![
4401                Expr::Literal(ScalarValue::FixedSizeBinary(16, Some(vec![0xAA; 16])), None),
4402                Expr::Literal(ScalarValue::FixedSizeBinary(16, Some(vec![0xBB; 16])), None),
4403            ],
4404            false,
4405        );
4406
4407        assert!(QueryPredicate::supports_filter(&filter, &model));
4408        let pred = QueryPredicate::from_filters(&[filter], &model);
4409        assert!(
4410            matches!(
4411                pred.constraints.get(&0),
4412                Some(PredicateConstraint::FixedBinaryIn(v)) if v.len() == 2
4413            ),
4414            "should extract FixedBinaryIn with 2 values"
4415        );
4416
4417        // Verify range generation produces 2 ranges (one per entity)
4418        let ranges = pred.primary_key_ranges(&model).unwrap();
4419        assert_eq!(ranges.len(), 2, "should produce one range per entity");
4420    }
4421
4422    #[test]
4423    fn decimal256_range_pushdown() {
4424        let config = KvTableConfig::new(
4425            0,
4426            vec![
4427                TableColumnConfig::new("id", DataType::Int64, false),
4428                TableColumnConfig::new("big_val", DataType::Decimal256(76, 0), false),
4429            ],
4430            vec!["id".to_string()],
4431            vec![IndexSpec::new("big_idx", vec!["big_val".to_string()]).unwrap()],
4432        )
4433        .unwrap();
4434        let model = TableModel::from_config(&config).unwrap();
4435
4436        use datafusion::logical_expr::col;
4437        let filter = col("big_val").gt_eq(Expr::Literal(
4438            ScalarValue::Decimal256(Some(i256::from(100i64)), 76, 0),
4439            None,
4440        ));
4441
4442        assert!(
4443            QueryPredicate::supports_filter(&filter, &model),
4444            "Decimal256 range should be supported"
4445        );
4446
4447        let pred = QueryPredicate::from_filters(&[filter], &model);
4448        let big_idx = *model.columns_by_name.get("big_val").unwrap();
4449        assert!(
4450            matches!(
4451                pred.constraints.get(&big_idx),
4452                Some(PredicateConstraint::Decimal256Range {
4453                    min: Some(_),
4454                    max: None
4455                })
4456            ),
4457            "should extract Decimal256Range with min=100, no max"
4458        );
4459
4460        // Verify constraint matching
4461        let val_in = CellValue::Decimal256(i256::from(200i64));
4462        let val_out = CellValue::Decimal256(i256::from(50i64));
4463        let constraint = pred.constraints.get(&big_idx).unwrap();
4464        assert!(matches_constraint(&val_in, constraint));
4465        assert!(!matches_constraint(&val_out, constraint));
4466    }
4467
4468    #[test]
4469    fn uint64_constraint_matching_does_not_wrap_large_values() {
4470        let gt_zero = PredicateConstraint::UInt64Range {
4471            min: Some(1),
4472            max: None,
4473        };
4474        assert!(matches_constraint(&CellValue::UInt64(1u64 << 63), &gt_zero));
4475        assert!(!matches_constraint(&CellValue::UInt64(0), &gt_zero));
4476
4477        let in_list = PredicateConstraint::UInt64In(vec![1, 2, 3]);
4478        assert!(matches_constraint(&CellValue::UInt64(2), &in_list));
4479        assert!(!matches_constraint(
4480            &CellValue::UInt64(1u64 << 63),
4481            &in_list
4482        ));
4483    }
4484
4485    #[test]
4486    fn uint64_empty_range_produces_no_pk_ranges() {
4487        let config = KvTableConfig::new(
4488            0,
4489            vec![TableColumnConfig::new("version", DataType::UInt64, false)],
4490            vec!["version".to_string()],
4491            vec![],
4492        )
4493        .unwrap();
4494        let model = TableModel::from_config(&config).unwrap();
4495        let mut pred = QueryPredicate::default();
4496        pred.constraints.insert(
4497            0,
4498            PredicateConstraint::UInt64Range {
4499                min: Some(10),
4500                max: Some(9),
4501            },
4502        );
4503
4504        let ranges = pred.primary_key_ranges(&model).unwrap();
4505        assert!(ranges.is_empty());
4506    }
4507
4508    #[test]
4509    fn utf8_primary_key_encoding_supports_unicode_and_long_values() {
4510        let config = KvTableConfig::new(
4511            0,
4512            vec![TableColumnConfig::new("id", DataType::Utf8, false)],
4513            vec!["id".to_string()],
4514            vec![],
4515        )
4516        .unwrap();
4517        let model = TableModel::from_config(&config).unwrap();
4518
4519        let row_non_ascii = KvRow {
4520            values: vec![CellValue::Utf8("naive-cafe-e9".replace("e9", "\u{00E9}"))],
4521        };
4522        let key_non_ascii = encode_primary_key_from_row(model.table_prefix, &row_non_ascii, &model)
4523            .expect("non-ascii PK should encode");
4524        let decoded_non_ascii = decode_primary_key(model.table_prefix, &key_non_ascii, &model)
4525            .expect("non-ascii PK should decode");
4526        assert!(matches!(
4527            decoded_non_ascii.as_slice(),
4528            [CellValue::Utf8(value)] if value == "naive-cafe-\u{00E9}"
4529        ));
4530
4531        let row_too_long = KvRow {
4532            values: vec![CellValue::Utf8("abcdefghijklmnopq".to_string())],
4533        };
4534        let key_too_long = encode_primary_key_from_row(model.table_prefix, &row_too_long, &model)
4535            .expect("long UTF-8 PK should encode");
4536        let decoded_too_long = decode_primary_key(model.table_prefix, &key_too_long, &model)
4537            .expect("long UTF-8 PK should decode");
4538        assert!(matches!(
4539            decoded_too_long.as_slice(),
4540            [CellValue::Utf8(value)] if value == "abcdefghijklmnopq"
4541        ));
4542    }
4543
4544    #[test]
4545    fn utf8_primary_key_encodes_at_max_codec_payload_and_rejects_overflow() {
4546        let config = KvTableConfig::new(
4547            0,
4548            vec![TableColumnConfig::new("id", DataType::Utf8, false)],
4549            vec!["id".to_string()],
4550            vec![],
4551        )
4552        .unwrap();
4553        let model = TableModel::from_config(&config).unwrap();
4554        let max_payload = model.primary_key_codec.payload_capacity_bytes();
4555        let max_value = "a".repeat(max_payload - 1);
4556        let overflow_value = "a".repeat(max_payload);
4557
4558        let key = encode_primary_key_from_row(
4559            model.table_prefix,
4560            &KvRow {
4561                values: vec![CellValue::Utf8(max_value.clone())],
4562            },
4563            &model,
4564        )
4565        .expect("max-length UTF-8 PK should encode");
4566        assert_eq!(key.len(), exoware_sdk::keys::MAX_KEY_LEN);
4567        let decoded = decode_primary_key(model.table_prefix, &key, &model)
4568            .expect("max-length PK should decode");
4569        assert!(matches!(
4570            decoded.as_slice(),
4571            [CellValue::Utf8(value)] if value == &max_value
4572        ));
4573
4574        let err = encode_primary_key_from_row(
4575            model.table_prefix,
4576            &KvRow {
4577                values: vec![CellValue::Utf8(overflow_value)],
4578            },
4579            &model,
4580        )
4581        .expect_err("UTF-8 PK exceeding codec payload should be rejected");
4582        assert!(err.contains("primary key payload exceeds codec payload capacity 253 bytes"));
4583    }
4584
4585    #[test]
4586    fn utf8_primary_key_round_trips_embedded_nul() {
4587        let config = KvTableConfig::new(
4588            0,
4589            vec![TableColumnConfig::new("id", DataType::Utf8, false)],
4590            vec!["id".to_string()],
4591            vec![],
4592        )
4593        .unwrap();
4594        let model = TableModel::from_config(&config).unwrap();
4595        let row = KvRow {
4596            values: vec![CellValue::Utf8("AB\0CD".to_string())],
4597        };
4598
4599        let key = encode_primary_key_from_row(model.table_prefix, &row, &model)
4600            .expect("embedded NUL in key text must encode");
4601        let decoded =
4602            decode_primary_key(model.table_prefix, &key, &model).expect("embedded NUL must decode");
4603        assert!(matches!(
4604            decoded.as_slice(),
4605            [CellValue::Utf8(value)] if value == "AB\0CD"
4606        ));
4607    }
4608
4609    #[test]
4610    fn utf8_index_key_round_trips_embedded_nul() {
4611        let config = KvTableConfig::new(
4612            0,
4613            vec![
4614                TableColumnConfig::new("id", DataType::Int64, false),
4615                TableColumnConfig::new("tag", DataType::Utf8, false),
4616            ],
4617            vec!["id".to_string()],
4618            vec![IndexSpec::new("tag_idx", vec!["tag".to_string()]).unwrap()],
4619        )
4620        .unwrap();
4621        let model = TableModel::from_config(&config).unwrap();
4622        let specs = model.resolve_index_specs(&config.index_specs).unwrap();
4623        let row = KvRow {
4624            values: vec![CellValue::Int64(1), CellValue::Utf8("AB\0CD".to_string())],
4625        };
4626
4627        let key = encode_secondary_index_key(model.table_prefix, &specs[0], &model, &row)
4628            .expect("embedded NUL in index key text must encode");
4629        let decoded = decode_secondary_index_key(model.table_prefix, &specs[0], &model, &key)
4630            .expect("embedded NUL index key must decode");
4631        assert!(matches!(
4632            decoded.values.get(&1),
4633            Some(CellValue::Utf8(value)) if value == "AB\0CD"
4634        ));
4635    }
4636
4637    #[test]
4638    fn secondary_index_with_long_utf8_primary_key_encodes_at_max_payload_and_rejects_overflow() {
4639        let config = KvTableConfig::new(
4640            0,
4641            vec![
4642                TableColumnConfig::new("id", DataType::Utf8, false),
4643                TableColumnConfig::new("tag", DataType::Utf8, false),
4644            ],
4645            vec!["id".to_string()],
4646            vec![IndexSpec::new("tag_idx", vec!["tag".to_string()]).unwrap()],
4647        )
4648        .unwrap();
4649        let model = TableModel::from_config(&config).unwrap();
4650        let specs = model.resolve_index_specs(&config.index_specs).unwrap();
4651        let spec = &specs[0];
4652        let max_payload = spec.codec.payload_capacity_bytes();
4653        let max_tag = "t".to_string();
4654        let max_id = "i".repeat(max_payload - encode_string_variable(&max_tag).unwrap().len() - 1);
4655        let overflow_id = format!("{max_id}x");
4656
4657        let key = encode_secondary_index_key(
4658            model.table_prefix,
4659            spec,
4660            &model,
4661            &KvRow {
4662                values: vec![
4663                    CellValue::Utf8(max_id.clone()),
4664                    CellValue::Utf8(max_tag.clone()),
4665                ],
4666            },
4667        )
4668        .expect("secondary key at max payload should encode");
4669        assert_eq!(key.len(), exoware_sdk::keys::MAX_KEY_LEN);
4670        let decoded =
4671            decode_secondary_index_key(model.table_prefix, spec, &model, &key).expect("decode");
4672        assert!(matches!(
4673            decoded.values.get(&1),
4674            Some(CellValue::Utf8(value)) if value == &max_tag
4675        ));
4676        assert!(matches!(
4677            decoded.primary_key_values.as_slice(),
4678            [CellValue::Utf8(value)] if value == &max_id
4679        ));
4680
4681        let err = encode_secondary_index_key(
4682            model.table_prefix,
4683            spec,
4684            &model,
4685            &KvRow {
4686                values: vec![CellValue::Utf8(overflow_id), CellValue::Utf8(max_tag)],
4687            },
4688        )
4689        .expect_err("secondary key exceeding max payload should be rejected");
4690        assert!(err.contains("index 'tag_idx' payload exceeds codec payload capacity 252 bytes"));
4691    }
4692
4693    #[test]
4694    fn secondary_index_from_parts_with_long_utf8_primary_key_rejects_overflow() {
4695        let config = KvTableConfig::new(
4696            0,
4697            vec![
4698                TableColumnConfig::new("id", DataType::Utf8, false),
4699                TableColumnConfig::new("tag", DataType::Utf8, false),
4700            ],
4701            vec!["id".to_string()],
4702            vec![IndexSpec::new("tag_idx", vec!["tag".to_string()]).unwrap()],
4703        )
4704        .unwrap();
4705        let model = TableModel::from_config(&config).unwrap();
4706        let specs = model.resolve_index_specs(&config.index_specs).unwrap();
4707        let spec = &specs[0];
4708        let max_payload = spec.codec.payload_capacity_bytes();
4709        let max_tag = "t".to_string();
4710        let max_id = "i".repeat(max_payload - encode_string_variable(&max_tag).unwrap().len() - 1);
4711        let overflow_id = format!("{max_id}x");
4712        let max_row = KvRow {
4713            values: vec![
4714                CellValue::Utf8(max_id.clone()),
4715                CellValue::Utf8(max_tag.clone()),
4716            ],
4717        };
4718        let encoded_row = encode_base_row_value(&max_row, &model).expect("encode row");
4719        let archived = decode_stored_row(&encoded_row).expect("archive row");
4720
4721        let key = encode_secondary_index_key_from_parts(
4722            model.table_prefix,
4723            spec,
4724            &model,
4725            &[CellValue::Utf8(max_id.clone())],
4726            &archived,
4727        )
4728        .expect("backfill path should encode max payload");
4729        assert_eq!(key.len(), exoware_sdk::keys::MAX_KEY_LEN);
4730
4731        let err = encode_secondary_index_key_from_parts(
4732            model.table_prefix,
4733            spec,
4734            &model,
4735            &[CellValue::Utf8(overflow_id)],
4736            &archived,
4737        )
4738        .expect_err("backfill path overflow should be rejected");
4739        assert!(err
4740            .to_string()
4741            .contains("index 'tag_idx' payload exceeds codec payload capacity 252 bytes"));
4742    }
4743
4744    #[test]
4745    fn primary_key_type_mismatch_returns_error_instead_of_panicking() {
4746        let config = KvTableConfig::new(
4747            0,
4748            vec![TableColumnConfig::new("id", DataType::UInt64, false)],
4749            vec!["id".to_string()],
4750            vec![],
4751        )
4752        .unwrap();
4753        let model = TableModel::from_config(&config).unwrap();
4754        let row = KvRow {
4755            values: vec![CellValue::Int64(7)],
4756        };
4757
4758        let err = encode_primary_key_from_row(model.table_prefix, &row, &model)
4759            .expect_err("mismatched PK type should return an error");
4760        assert!(err.contains("type mismatch while encoding key value"));
4761    }
4762
4763    #[test]
4764    fn choose_index_plan_uses_fixed_binary_leading_constraint() {
4765        let config = KvTableConfig::new(
4766            0,
4767            vec![
4768                TableColumnConfig::new("id", DataType::Int64, false),
4769                TableColumnConfig::new("entity", DataType::FixedSizeBinary(16), false),
4770            ],
4771            vec!["id".to_string()],
4772            vec![IndexSpec::new("entity_idx", vec!["entity".to_string()]).unwrap()],
4773        )
4774        .unwrap();
4775        let model = TableModel::from_config(&config).unwrap();
4776        let specs = model.resolve_index_specs(&config.index_specs).unwrap();
4777
4778        use datafusion::logical_expr::col;
4779        let filter = col("entity").eq(Expr::Literal(
4780            ScalarValue::FixedSizeBinary(16, Some(vec![0xAB; 16])),
4781            None,
4782        ));
4783        let pred = QueryPredicate::from_filters(&[filter], &model);
4784        let plan = pred
4785            .choose_index_plan(&model, &specs)
4786            .unwrap()
4787            .expect("fixed-binary equality should choose an index");
4788
4789        assert_eq!(plan.constrained_prefix_len, 1);
4790        assert_eq!(plan.ranges.len(), 1);
4791        let range = &plan.ranges[0];
4792        assert_eq!(
4793            index_payload(&specs[0], &range.start, 0, 16),
4794            vec![0xAB; 16]
4795        );
4796        assert_eq!(index_payload(&specs[0], &range.end, 0, 16), vec![0xAB; 16]);
4797    }
4798
4799    #[test]
4800    fn choose_index_plan_uses_decimal256_leading_constraint() {
4801        let config = KvTableConfig::new(
4802            0,
4803            vec![
4804                TableColumnConfig::new("id", DataType::Int64, false),
4805                TableColumnConfig::new("big_val", DataType::Decimal256(76, 0), false),
4806            ],
4807            vec!["id".to_string()],
4808            vec![IndexSpec::new("big_idx", vec!["big_val".to_string()]).unwrap()],
4809        )
4810        .unwrap();
4811        let model = TableModel::from_config(&config).unwrap();
4812        let specs = model.resolve_index_specs(&config.index_specs).unwrap();
4813
4814        use datafusion::logical_expr::col;
4815        let filter = col("big_val").gt_eq(Expr::Literal(
4816            ScalarValue::Decimal256(Some(i256::from(100i64)), 76, 0),
4817            None,
4818        ));
4819        let pred = QueryPredicate::from_filters(&[filter], &model);
4820        let plan = pred
4821            .choose_index_plan(&model, &specs)
4822            .unwrap()
4823            .expect("decimal256 range should choose an index");
4824
4825        assert_eq!(plan.constrained_prefix_len, 1);
4826        assert_eq!(plan.ranges.len(), 1);
4827        let range = &plan.ranges[0];
4828        assert_eq!(
4829            index_payload(&specs[0], &range.start, 0, 32),
4830            encode_i256_ordered(i256::from(100i64)).to_vec()
4831        );
4832    }
4833
4834    #[tokio::test]
4835    async fn backfill_added_indexes_writes_entries_for_existing_rows() {
4836        let state = MockState {
4837            kv: Arc::new(Mutex::new(BTreeMap::new())),
4838            range_calls: Arc::new(AtomicUsize::new(0)),
4839            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
4840            sequence_number: Arc::new(AtomicU64::new(0)),
4841        };
4842        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
4843        let client = StoreClient::new(&base_url);
4844
4845        let seed_schema = KvSchema::new(client.clone())
4846            .table(
4847                "orders",
4848                vec![
4849                    TableColumnConfig::new("id", DataType::Int64, false),
4850                    TableColumnConfig::new("status", DataType::Utf8, false),
4851                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
4852                ],
4853                vec!["id".to_string()],
4854                vec![],
4855            )
4856            .expect("seed schema");
4857        let mut writer = seed_schema.batch_writer();
4858        for i in 0..6i64 {
4859            writer
4860                .insert(
4861                    "orders",
4862                    vec![
4863                        CellValue::Int64(i),
4864                        CellValue::Utf8(if i % 2 == 0 { "open" } else { "closed" }.to_string()),
4865                        CellValue::Int64(i * 10),
4866                    ],
4867                )
4868                .expect("seed row");
4869        }
4870        writer.flush().await.expect("seed flush");
4871
4872        {
4873            let guard = state.kv.lock().expect("kv mutex poisoned");
4874            let base_rows = guard
4875                .keys()
4876                .filter(|key| matches_primary_key(0, key))
4877                .count();
4878            let index_rows = guard
4879                .keys()
4880                .filter(|key| matches_secondary_index_key(0, 1, key))
4881                .count();
4882            assert_eq!(base_rows, 6);
4883            assert_eq!(index_rows, 0);
4884        }
4885
4886        let backfill_schema = KvSchema::new(client.clone())
4887            .table(
4888                "orders",
4889                vec![
4890                    TableColumnConfig::new("id", DataType::Int64, false),
4891                    TableColumnConfig::new("status", DataType::Utf8, false),
4892                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
4893                ],
4894                vec!["id".to_string()],
4895                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
4896                    .expect("valid index")
4897                    .with_cover_columns(vec!["amount_cents".to_string()])],
4898            )
4899            .expect("backfill schema");
4900        let report = backfill_schema
4901            .backfill_added_indexes_with_options(
4902                "orders",
4903                &[],
4904                IndexBackfillOptions {
4905                    row_batch_size: 2,
4906                    start_from_primary_key: None,
4907                },
4908            )
4909            .await
4910            .expect("backfill should succeed");
4911        assert_eq!(report.scanned_rows, 6);
4912        assert_eq!(report.indexes_backfilled, 1);
4913        assert_eq!(report.index_entries_written, 6);
4914
4915        {
4916            let guard = state.kv.lock().expect("kv mutex poisoned");
4917            let index_rows = guard
4918                .keys()
4919                .filter(|key| matches_secondary_index_key(0, 1, key))
4920                .count();
4921            assert_eq!(index_rows, 6);
4922            let (_, sample_value) = guard
4923                .iter()
4924                .find(|(key, _)| matches_secondary_index_key(0, 1, key))
4925                .expect("backfill should create index entry");
4926            let archived = decode_stored_row(sample_value.as_ref())
4927                .expect("covering value must be valid codec");
4928            assert_eq!(archived.values.len(), 3);
4929        }
4930
4931        let _ = shutdown_tx.send(());
4932    }
4933
4934    #[tokio::test]
4935    async fn backfill_added_indexes_writes_zorder_entries_for_existing_rows() {
4936        let state = MockState {
4937            kv: Arc::new(Mutex::new(BTreeMap::new())),
4938            range_calls: Arc::new(AtomicUsize::new(0)),
4939            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
4940            sequence_number: Arc::new(AtomicU64::new(0)),
4941        };
4942        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
4943        let client = StoreClient::new(&base_url);
4944
4945        let seed_schema = KvSchema::new(client.clone())
4946            .table(
4947                "points",
4948                vec![
4949                    TableColumnConfig::new("x", DataType::Int64, false),
4950                    TableColumnConfig::new("y", DataType::Int64, false),
4951                    TableColumnConfig::new("id", DataType::Int64, false),
4952                    TableColumnConfig::new("value", DataType::Int64, false),
4953                ],
4954                vec!["id".to_string()],
4955                vec![],
4956            )
4957            .expect("seed schema");
4958        let mut writer = seed_schema.batch_writer();
4959        for (x, y, id, value) in [(1, 1, 11, 110), (1, 2, 12, 120), (2, 1, 21, 210)] {
4960            writer
4961                .insert(
4962                    "points",
4963                    vec![
4964                        CellValue::Int64(x),
4965                        CellValue::Int64(y),
4966                        CellValue::Int64(id),
4967                        CellValue::Int64(value),
4968                    ],
4969                )
4970                .expect("seed row");
4971        }
4972        writer.flush().await.expect("seed flush");
4973
4974        let backfill_schema = KvSchema::new(client.clone())
4975            .table(
4976                "points",
4977                vec![
4978                    TableColumnConfig::new("x", DataType::Int64, false),
4979                    TableColumnConfig::new("y", DataType::Int64, false),
4980                    TableColumnConfig::new("id", DataType::Int64, false),
4981                    TableColumnConfig::new("value", DataType::Int64, false),
4982                ],
4983                vec!["id".to_string()],
4984                vec![
4985                    IndexSpec::z_order("xy_z", vec!["x".to_string(), "y".to_string()])
4986                        .expect("valid index")
4987                        .with_cover_columns(vec!["value".to_string()]),
4988                ],
4989            )
4990            .expect("backfill schema");
4991        let report = backfill_schema
4992            .backfill_added_indexes_with_options(
4993                "points",
4994                &[],
4995                IndexBackfillOptions {
4996                    row_batch_size: 2,
4997                    start_from_primary_key: None,
4998                },
4999            )
5000            .await
5001            .expect("backfill should succeed");
5002        assert_eq!(report.scanned_rows, 3);
5003        assert_eq!(report.index_entries_written, 3);
5004
5005        let guard = state.kv.lock().expect("kv mutex poisoned");
5006        let index_entry = guard
5007            .keys()
5008            .find(|key| matches_secondary_index_key(0, 1, key))
5009            .cloned()
5010            .expect("z-order backfill should create index entry");
5011        let config = KvTableConfig::new(
5012            0,
5013            vec![
5014                TableColumnConfig::new("x", DataType::Int64, false),
5015                TableColumnConfig::new("y", DataType::Int64, false),
5016                TableColumnConfig::new("id", DataType::Int64, false),
5017                TableColumnConfig::new("value", DataType::Int64, false),
5018            ],
5019            vec!["id".to_string()],
5020            vec![
5021                IndexSpec::z_order("xy_z", vec!["x".to_string(), "y".to_string()]).expect("valid"),
5022            ],
5023        )
5024        .expect("config");
5025        let model = TableModel::from_config(&config).expect("model");
5026        let spec = model
5027            .resolve_index_specs(&config.index_specs)
5028            .expect("specs")
5029            .remove(0);
5030        let decoded = decode_secondary_index_key(model.table_prefix, &spec, &model, &index_entry)
5031            .expect("decode z-order key");
5032        let x_idx = *model.columns_by_name.get("x").unwrap();
5033        let y_idx = *model.columns_by_name.get("y").unwrap();
5034        assert!(matches!(
5035            decoded.values.get(&x_idx),
5036            Some(CellValue::Int64(_))
5037        ));
5038        assert!(matches!(
5039            decoded.values.get(&y_idx),
5040            Some(CellValue::Int64(_))
5041        ));
5042
5043        let _ = shutdown_tx.send(());
5044    }
5045
5046    #[tokio::test]
5047    async fn backfill_added_indexes_requires_append_only_index_evolution() {
5048        let client = StoreClient::new("http://127.0.0.1:1");
5049        let schema = KvSchema::new(client)
5050            .table(
5051                "orders",
5052                vec![
5053                    TableColumnConfig::new("id", DataType::Int64, false),
5054                    TableColumnConfig::new("status", DataType::Utf8, false),
5055                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
5056                ],
5057                vec!["id".to_string()],
5058                vec![
5059                    IndexSpec::new("status_idx", vec!["status".to_string()]).expect("valid"),
5060                    IndexSpec::new("amount_idx", vec!["amount_cents".to_string()]).expect("valid"),
5061                ],
5062            )
5063            .expect("schema");
5064
5065        let previous_specs =
5066            vec![IndexSpec::new("amount_idx", vec!["amount_cents".to_string()]).expect("valid")];
5067        let err = schema
5068            .backfill_added_indexes("orders", &previous_specs)
5069            .await
5070            .expect_err("non-append-only evolution should be rejected");
5071        assert!(err
5072            .to_string()
5073            .contains("index evolution must be append-only"));
5074    }
5075
5076    #[tokio::test]
5077    async fn backfill_added_indexes_is_noop_when_no_new_indexes() {
5078        let client = StoreClient::new("http://127.0.0.1:1");
5079        let existing = IndexSpec::new("status_idx", vec!["status".to_string()])
5080            .expect("valid")
5081            .with_cover_columns(vec!["amount_cents".to_string()]);
5082        let schema = KvSchema::new(client)
5083            .table(
5084                "orders",
5085                vec![
5086                    TableColumnConfig::new("id", DataType::Int64, false),
5087                    TableColumnConfig::new("status", DataType::Utf8, false),
5088                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
5089                ],
5090                vec!["id".to_string()],
5091                vec![existing.clone()],
5092            )
5093            .expect("schema");
5094
5095        let report = schema
5096            .backfill_added_indexes("orders", &[existing])
5097            .await
5098            .expect("no-op backfill should succeed");
5099        assert_eq!(report, IndexBackfillReport::default());
5100    }
5101
5102    #[tokio::test]
5103    async fn backfill_added_indexes_rejects_zero_row_batch_size() {
5104        let client = StoreClient::new("http://127.0.0.1:1");
5105        let schema = KvSchema::new(client)
5106            .table(
5107                "orders",
5108                vec![
5109                    TableColumnConfig::new("id", DataType::Int64, false),
5110                    TableColumnConfig::new("status", DataType::Utf8, false),
5111                ],
5112                vec!["id".to_string()],
5113                vec![IndexSpec::new("status_idx", vec!["status".to_string()]).expect("valid")],
5114            )
5115            .expect("schema");
5116        let err = schema
5117            .backfill_added_indexes_with_options(
5118                "orders",
5119                &[],
5120                IndexBackfillOptions {
5121                    row_batch_size: 0,
5122                    start_from_primary_key: None,
5123                },
5124            )
5125            .await
5126            .expect_err("row_batch_size=0 should fail");
5127        assert!(err.to_string().contains("row_batch_size must be > 0"));
5128    }
5129
5130    #[tokio::test]
5131    async fn backfill_added_indexes_emits_progress_events() {
5132        let state = MockState {
5133            kv: Arc::new(Mutex::new(BTreeMap::new())),
5134            range_calls: Arc::new(AtomicUsize::new(0)),
5135            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
5136            sequence_number: Arc::new(AtomicU64::new(0)),
5137        };
5138        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
5139        let client = StoreClient::new(&base_url);
5140
5141        let seed_schema = KvSchema::new(client.clone())
5142            .table(
5143                "orders",
5144                vec![
5145                    TableColumnConfig::new("id", DataType::Int64, false),
5146                    TableColumnConfig::new("status", DataType::Utf8, false),
5147                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
5148                ],
5149                vec!["id".to_string()],
5150                vec![],
5151            )
5152            .expect("seed schema");
5153        let mut writer = seed_schema.batch_writer();
5154        for i in 0..5i64 {
5155            writer
5156                .insert(
5157                    "orders",
5158                    vec![
5159                        CellValue::Int64(i),
5160                        CellValue::Utf8("open".to_string()),
5161                        CellValue::Int64(i * 10),
5162                    ],
5163                )
5164                .expect("seed row");
5165        }
5166        writer.flush().await.expect("seed flush");
5167
5168        let backfill_schema = KvSchema::new(client.clone())
5169            .table(
5170                "orders",
5171                vec![
5172                    TableColumnConfig::new("id", DataType::Int64, false),
5173                    TableColumnConfig::new("status", DataType::Utf8, false),
5174                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
5175                ],
5176                vec!["id".to_string()],
5177                vec![IndexSpec::new("status_idx", vec!["status".to_string()]).expect("valid")],
5178            )
5179            .expect("backfill schema");
5180
5181        let (progress_tx, mut progress_rx) = mpsc::unbounded_channel();
5182        let report = backfill_schema
5183            .backfill_added_indexes_with_options_and_progress(
5184                "orders",
5185                &[],
5186                IndexBackfillOptions {
5187                    row_batch_size: 2,
5188                    start_from_primary_key: None,
5189                },
5190                Some(&progress_tx),
5191            )
5192            .await
5193            .expect("backfill should succeed");
5194        drop(progress_tx);
5195
5196        let mut saw_started = false;
5197        let mut saw_completed = false;
5198        let mut progress_events = 0usize;
5199        while let Some(event) = progress_rx.recv().await {
5200            match event {
5201                IndexBackfillEvent::Started {
5202                    table_name,
5203                    indexes_backfilled,
5204                    row_batch_size,
5205                    ..
5206                } => {
5207                    saw_started = true;
5208                    assert_eq!(table_name, "orders");
5209                    assert_eq!(indexes_backfilled, 1);
5210                    assert_eq!(row_batch_size, 2);
5211                }
5212                IndexBackfillEvent::Progress {
5213                    scanned_rows,
5214                    index_entries_written,
5215                    ..
5216                } => {
5217                    progress_events += 1;
5218                    assert!(scanned_rows >= 1);
5219                    assert_eq!(scanned_rows, index_entries_written);
5220                }
5221                IndexBackfillEvent::Completed {
5222                    report: completed_report,
5223                } => {
5224                    saw_completed = true;
5225                    assert_eq!(completed_report, report);
5226                }
5227            }
5228        }
5229        assert!(saw_started);
5230        assert!(saw_completed);
5231        assert!(progress_events >= 1);
5232
5233        let _ = shutdown_tx.send(());
5234    }
5235
5236    #[tokio::test]
5237    async fn backfill_added_indexes_can_resume_from_primary_key() {
5238        let state = MockState {
5239            kv: Arc::new(Mutex::new(BTreeMap::new())),
5240            range_calls: Arc::new(AtomicUsize::new(0)),
5241            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
5242            sequence_number: Arc::new(AtomicU64::new(0)),
5243        };
5244        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
5245        let client = StoreClient::new(&base_url);
5246
5247        let seed_schema = KvSchema::new(client.clone())
5248            .table(
5249                "orders",
5250                vec![
5251                    TableColumnConfig::new("id", DataType::Int64, false),
5252                    TableColumnConfig::new("status", DataType::Utf8, false),
5253                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
5254                ],
5255                vec!["id".to_string()],
5256                vec![],
5257            )
5258            .expect("seed schema");
5259        let mut writer = seed_schema.batch_writer();
5260        for i in 0..6i64 {
5261            writer
5262                .insert(
5263                    "orders",
5264                    vec![
5265                        CellValue::Int64(i),
5266                        CellValue::Utf8("open".to_string()),
5267                        CellValue::Int64(i * 10),
5268                    ],
5269                )
5270                .expect("seed row");
5271        }
5272        writer.flush().await.expect("seed flush");
5273
5274        let backfill_schema = KvSchema::new(client.clone())
5275            .table(
5276                "orders",
5277                vec![
5278                    TableColumnConfig::new("id", DataType::Int64, false),
5279                    TableColumnConfig::new("status", DataType::Utf8, false),
5280                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
5281                ],
5282                vec!["id".to_string()],
5283                vec![IndexSpec::new("status_idx", vec!["status".to_string()]).expect("valid")],
5284            )
5285            .expect("backfill schema");
5286
5287        let config = KvTableConfig::new(
5288            0,
5289            vec![
5290                TableColumnConfig::new("id", DataType::Int64, false),
5291                TableColumnConfig::new("status", DataType::Utf8, false),
5292                TableColumnConfig::new("amount_cents", DataType::Int64, false),
5293            ],
5294            vec!["id".to_string()],
5295            vec![],
5296        )
5297        .expect("valid config");
5298        let model = TableModel::from_config(&config).expect("model");
5299        let resume_value = CellValue::Int64(3);
5300        let resume_key =
5301            encode_primary_key(model.table_prefix, &[&resume_value], &model).expect("resume key");
5302
5303        let report = backfill_schema
5304            .backfill_added_indexes_with_options(
5305                "orders",
5306                &[],
5307                IndexBackfillOptions {
5308                    row_batch_size: 2,
5309                    start_from_primary_key: Some(resume_key.clone()),
5310                },
5311            )
5312            .await
5313            .expect("resume backfill should succeed");
5314        assert_eq!(report.scanned_rows, 3);
5315        assert_eq!(report.index_entries_written, 3);
5316
5317        {
5318            let guard = state.kv.lock().expect("kv mutex poisoned");
5319            let index_rows = guard
5320                .keys()
5321                .filter(|key| matches_secondary_index_key(0, 1, key))
5322                .count();
5323            assert_eq!(index_rows, 3);
5324        }
5325
5326        let resume_payload = model
5327            .primary_key_codec
5328            .read_payload(&resume_key, 0, model.primary_key_width)
5329            .expect("resume payload");
5330        let wrong_prefix = secondary_index_codec(model.table_prefix, 1)
5331            .expect("secondary codec")
5332            .encode(&resume_payload)
5333            .expect("wrong prefix key");
5334        let err = backfill_schema
5335            .backfill_added_indexes_with_options(
5336                "orders",
5337                &[],
5338                IndexBackfillOptions {
5339                    row_batch_size: 2,
5340                    start_from_primary_key: Some(wrong_prefix),
5341                },
5342            )
5343            .await
5344            .expect_err("wrong key prefix must be rejected");
5345        assert!(err.to_string().contains("primary-key prefix"));
5346
5347        let _ = shutdown_tx.send(());
5348    }
5349
5350    #[tokio::test]
5351    async fn covering_index_scan_fails_closed_when_covering_payload_missing() {
5352        let state = MockState {
5353            kv: Arc::new(Mutex::new(BTreeMap::new())),
5354            range_calls: Arc::new(AtomicUsize::new(0)),
5355            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
5356            sequence_number: Arc::new(AtomicU64::new(0)),
5357        };
5358        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
5359        let client = StoreClient::new(&base_url);
5360
5361        let schema = KvSchema::new(client.clone())
5362            .table(
5363                "orders",
5364                vec![
5365                    TableColumnConfig::new("id", DataType::Int64, false),
5366                    TableColumnConfig::new("status", DataType::Utf8, false),
5367                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
5368                ],
5369                vec!["id".to_string()],
5370                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
5371                    .expect("valid")
5372                    .with_cover_columns(vec!["amount_cents".to_string()])],
5373            )
5374            .expect("schema");
5375        let mut writer = schema.batch_writer();
5376        for id in 0..4i64 {
5377            writer
5378                .insert(
5379                    "orders",
5380                    vec![
5381                        CellValue::Int64(id),
5382                        CellValue::Utf8("open".to_string()),
5383                        CellValue::Int64(id * 10),
5384                    ],
5385                )
5386                .expect("row");
5387        }
5388        writer.flush().await.expect("flush");
5389
5390        {
5391            let mut guard = state.kv.lock().expect("kv mutex poisoned");
5392            let key = guard
5393                .keys()
5394                .find(|key| matches_secondary_index_key(0, 1, key))
5395                .expect("index row should exist")
5396                .clone();
5397            guard.insert(key, Bytes::new());
5398        }
5399
5400        let ctx = SessionContext::new();
5401        schema.register_all(&ctx).expect("register");
5402        let df = ctx
5403            .sql("SELECT amount_cents FROM orders WHERE status = 'open'")
5404            .await
5405            .expect("query should plan");
5406        let err = df
5407            .collect()
5408            .await
5409            .expect_err("missing covering payload must fail closed");
5410        assert!(err
5411            .to_string()
5412            .contains("secondary index entry missing covering payload"));
5413
5414        let _ = shutdown_tx.send(());
5415    }
5416
5417    #[tokio::test]
5418    async fn covering_index_scan_fails_closed_when_covering_payload_is_corrupt() {
5419        let state = MockState {
5420            kv: Arc::new(Mutex::new(BTreeMap::new())),
5421            range_calls: Arc::new(AtomicUsize::new(0)),
5422            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
5423            sequence_number: Arc::new(AtomicU64::new(0)),
5424        };
5425        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
5426        let client = StoreClient::new(&base_url);
5427
5428        let schema = KvSchema::new(client.clone())
5429            .table(
5430                "orders",
5431                vec![
5432                    TableColumnConfig::new("id", DataType::Int64, false),
5433                    TableColumnConfig::new("status", DataType::Utf8, false),
5434                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
5435                ],
5436                vec!["id".to_string()],
5437                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
5438                    .expect("valid")
5439                    .with_cover_columns(vec!["amount_cents".to_string()])],
5440            )
5441            .expect("schema");
5442        let mut writer = schema.batch_writer();
5443        for id in 0..4i64 {
5444            writer
5445                .insert(
5446                    "orders",
5447                    vec![
5448                        CellValue::Int64(id),
5449                        CellValue::Utf8("open".to_string()),
5450                        CellValue::Int64(id * 10),
5451                    ],
5452                )
5453                .expect("row");
5454        }
5455        writer.flush().await.expect("flush");
5456
5457        {
5458            let mut guard = state.kv.lock().expect("kv mutex poisoned");
5459            let key = guard
5460                .keys()
5461                .find(|key| matches_secondary_index_key(0, 1, key))
5462                .expect("index row should exist")
5463                .clone();
5464            guard.insert(key, Bytes::from_static(b"not-codec"));
5465        }
5466
5467        let ctx = SessionContext::new();
5468        schema.register_all(&ctx).expect("register");
5469        let df = ctx
5470            .sql("SELECT amount_cents FROM orders WHERE status = 'open'")
5471            .await
5472            .expect("query should plan");
5473        let err = df
5474            .collect()
5475            .await
5476            .expect_err("corrupt covering payload must fail closed");
5477        assert!(err.to_string().contains("invalid covering index payload"));
5478
5479        let _ = shutdown_tx.send(());
5480    }
5481
5482    #[tokio::test]
5483    async fn non_covering_index_uses_point_lookup_instead_of_full_scan() {
5484        let state = MockState {
5485            kv: Arc::new(Mutex::new(BTreeMap::new())),
5486            range_calls: Arc::new(AtomicUsize::new(0)),
5487            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
5488            sequence_number: Arc::new(AtomicU64::new(0)),
5489        };
5490        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
5491        let client = StoreClient::new(&base_url);
5492
5493        let schema = KvSchema::new(client.clone())
5494            .table(
5495                "orders",
5496                vec![
5497                    TableColumnConfig::new("id", DataType::Int64, false),
5498                    TableColumnConfig::new("status", DataType::Utf8, false),
5499                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
5500                    TableColumnConfig::new("notes", DataType::Utf8, true),
5501                ],
5502                vec!["id".to_string()],
5503                vec![IndexSpec::new("status_idx", vec!["status".to_string()]).expect("valid")],
5504            )
5505            .expect("schema");
5506        let mut writer = schema.batch_writer();
5507        writer
5508            .insert(
5509                "orders",
5510                vec![
5511                    CellValue::Int64(1),
5512                    CellValue::Utf8("open".to_string()),
5513                    CellValue::Int64(100),
5514                    CellValue::Utf8("first".to_string()),
5515                ],
5516            )
5517            .expect("row");
5518        writer
5519            .insert(
5520                "orders",
5521                vec![
5522                    CellValue::Int64(2),
5523                    CellValue::Utf8("closed".to_string()),
5524                    CellValue::Int64(200),
5525                    CellValue::Utf8("second".to_string()),
5526                ],
5527            )
5528            .expect("row");
5529        writer
5530            .insert(
5531                "orders",
5532                vec![
5533                    CellValue::Int64(3),
5534                    CellValue::Utf8("open".to_string()),
5535                    CellValue::Int64(300),
5536                    CellValue::Utf8("third".to_string()),
5537                ],
5538            )
5539            .expect("row");
5540        writer.flush().await.expect("flush");
5541
5542        let ctx = SessionContext::new();
5543        schema.register_all(&ctx).expect("register");
5544
5545        let df = ctx
5546            .sql("SELECT id, notes FROM orders WHERE status = 'open' ORDER BY id")
5547            .await
5548            .expect("plan");
5549        let batches = df.collect().await.expect("non-covering index lookup");
5550        let ids: Vec<i64> = batches
5551            .iter()
5552            .flat_map(|b| {
5553                b.column(0)
5554                    .as_any()
5555                    .downcast_ref::<datafusion::arrow::array::Int64Array>()
5556                    .unwrap()
5557                    .iter()
5558                    .map(|v| v.unwrap())
5559            })
5560            .collect();
5561        let notes: Vec<String> = batches
5562            .iter()
5563            .flat_map(|b| {
5564                b.column(1)
5565                    .as_any()
5566                    .downcast_ref::<datafusion::arrow::array::StringArray>()
5567                    .unwrap()
5568                    .iter()
5569                    .map(|v| v.unwrap().to_string())
5570            })
5571            .collect();
5572        assert_eq!(ids, vec![1, 3]);
5573        assert_eq!(notes, vec!["first", "third"]);
5574
5575        let _ = shutdown_tx.send(());
5576    }
5577
5578    #[tokio::test]
5579    async fn backfill_resume_cursor_can_continue_without_skips_or_duplicates() {
5580        let state = MockState {
5581            kv: Arc::new(Mutex::new(BTreeMap::new())),
5582            range_calls: Arc::new(AtomicUsize::new(0)),
5583            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
5584            sequence_number: Arc::new(AtomicU64::new(0)),
5585        };
5586        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
5587        let client = StoreClient::new(&base_url);
5588
5589        let seed_schema = KvSchema::new(client.clone())
5590            .table(
5591                "orders",
5592                vec![
5593                    TableColumnConfig::new("id", DataType::Int64, false),
5594                    TableColumnConfig::new("status", DataType::Utf8, false),
5595                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
5596                ],
5597                vec!["id".to_string()],
5598                vec![],
5599            )
5600            .expect("seed schema");
5601        let mut writer = seed_schema.batch_writer();
5602        for i in 0..8i64 {
5603            writer
5604                .insert(
5605                    "orders",
5606                    vec![
5607                        CellValue::Int64(i),
5608                        CellValue::Utf8("open".to_string()),
5609                        CellValue::Int64(i * 10),
5610                    ],
5611                )
5612                .expect("seed row");
5613        }
5614        writer.flush().await.expect("seed flush");
5615
5616        let backfill_schema = KvSchema::new(client.clone())
5617            .table(
5618                "orders",
5619                vec![
5620                    TableColumnConfig::new("id", DataType::Int64, false),
5621                    TableColumnConfig::new("status", DataType::Utf8, false),
5622                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
5623                ],
5624                vec!["id".to_string()],
5625                vec![IndexSpec::new("status_idx", vec!["status".to_string()]).expect("valid")],
5626            )
5627            .expect("backfill schema");
5628
5629        let task_schema = KvSchema::new(client.clone())
5630            .table(
5631                "orders",
5632                vec![
5633                    TableColumnConfig::new("id", DataType::Int64, false),
5634                    TableColumnConfig::new("status", DataType::Utf8, false),
5635                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
5636                ],
5637                vec!["id".to_string()],
5638                vec![IndexSpec::new("status_idx", vec!["status".to_string()]).expect("valid")],
5639            )
5640            .expect("task schema");
5641        let (progress_tx, mut progress_rx) = mpsc::unbounded_channel();
5642        let handle = tokio::spawn(async move {
5643            task_schema
5644                .backfill_added_indexes_with_options_and_progress(
5645                    "orders",
5646                    &[],
5647                    IndexBackfillOptions {
5648                        row_batch_size: 2,
5649                        start_from_primary_key: None,
5650                    },
5651                    Some(&progress_tx),
5652                )
5653                .await
5654        });
5655
5656        let mut resume_cursor = None;
5657        while let Some(event) = progress_rx.recv().await {
5658            if let IndexBackfillEvent::Progress { next_cursor, .. } = event {
5659                resume_cursor = next_cursor;
5660                break;
5661            }
5662        }
5663        handle.abort();
5664        let resume_cursor =
5665            resume_cursor.expect("first progress event should provide resume cursor");
5666
5667        let report = backfill_schema
5668            .backfill_added_indexes_with_options(
5669                "orders",
5670                &[],
5671                IndexBackfillOptions {
5672                    row_batch_size: 2,
5673                    start_from_primary_key: Some(resume_cursor),
5674                },
5675            )
5676            .await
5677            .expect("resume backfill should succeed");
5678        assert_eq!(report.scanned_rows, 6);
5679
5680        let guard = state.kv.lock().expect("kv mutex poisoned");
5681        let base_rows = guard
5682            .keys()
5683            .filter(|key| matches_primary_key(0, key))
5684            .count();
5685        let index_rows = guard
5686            .keys()
5687            .filter(|key| matches_secondary_index_key(0, 1, key))
5688            .count();
5689        assert_eq!(base_rows, 8);
5690        assert_eq!(
5691            index_rows, 8,
5692            "resume should backfill each row exactly once"
5693        );
5694
5695        let _ = shutdown_tx.send(());
5696    }
5697
5698    #[tokio::test]
5699    async fn concurrent_writes_during_backfill_preserve_index_correctness() {
5700        let state = MockState {
5701            kv: Arc::new(Mutex::new(BTreeMap::new())),
5702            range_calls: Arc::new(AtomicUsize::new(0)),
5703            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
5704            sequence_number: Arc::new(AtomicU64::new(0)),
5705        };
5706        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
5707        let client = StoreClient::new(&base_url);
5708
5709        let seed_schema = KvSchema::new(client.clone())
5710            .table(
5711                "orders",
5712                vec![
5713                    TableColumnConfig::new("id", DataType::Int64, false),
5714                    TableColumnConfig::new("status", DataType::Utf8, false),
5715                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
5716                ],
5717                vec!["id".to_string()],
5718                vec![],
5719            )
5720            .expect("seed schema");
5721        let mut seed_writer = seed_schema.batch_writer();
5722        for i in 0..40i64 {
5723            seed_writer
5724                .insert(
5725                    "orders",
5726                    vec![
5727                        CellValue::Int64(i),
5728                        CellValue::Utf8(if i % 2 == 0 { "open" } else { "closed" }.to_string()),
5729                        CellValue::Int64(i * 10),
5730                    ],
5731                )
5732                .expect("seed row");
5733        }
5734        seed_writer.flush().await.expect("seed flush");
5735
5736        let backfill_schema = KvSchema::new(client.clone())
5737            .table(
5738                "orders",
5739                vec![
5740                    TableColumnConfig::new("id", DataType::Int64, false),
5741                    TableColumnConfig::new("status", DataType::Utf8, false),
5742                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
5743                ],
5744                vec!["id".to_string()],
5745                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
5746                    .expect("valid")
5747                    .with_cover_columns(vec!["amount_cents".to_string()])],
5748            )
5749            .expect("backfill schema");
5750
5751        let task_schema = KvSchema::new(client.clone())
5752            .table(
5753                "orders",
5754                vec![
5755                    TableColumnConfig::new("id", DataType::Int64, false),
5756                    TableColumnConfig::new("status", DataType::Utf8, false),
5757                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
5758                ],
5759                vec!["id".to_string()],
5760                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
5761                    .expect("valid")
5762                    .with_cover_columns(vec!["amount_cents".to_string()])],
5763            )
5764            .expect("task schema");
5765        let (progress_tx, mut progress_rx) = mpsc::unbounded_channel();
5766        let handle = tokio::spawn(async move {
5767            task_schema
5768                .backfill_added_indexes_with_options_and_progress(
5769                    "orders",
5770                    &[],
5771                    IndexBackfillOptions {
5772                        row_batch_size: 5,
5773                        start_from_primary_key: None,
5774                    },
5775                    Some(&progress_tx),
5776                )
5777                .await
5778        });
5779
5780        while let Some(event) = progress_rx.recv().await {
5781            if matches!(event, IndexBackfillEvent::Progress { .. }) {
5782                break;
5783            }
5784        }
5785
5786        let mut concurrent_writer = backfill_schema.batch_writer();
5787        for id in [100i64, 101i64] {
5788            concurrent_writer
5789                .insert(
5790                    "orders",
5791                    vec![
5792                        CellValue::Int64(id),
5793                        CellValue::Utf8("open".to_string()),
5794                        CellValue::Int64(id * 10),
5795                    ],
5796                )
5797                .expect("concurrent row");
5798        }
5799        concurrent_writer.flush().await.expect("concurrent flush");
5800
5801        let report = handle
5802            .await
5803            .expect("backfill task join")
5804            .expect("backfill result");
5805        assert!(
5806            report.scanned_rows >= 40,
5807            "backfill should at least scan the original historical rows"
5808        );
5809
5810        let guard = state.kv.lock().expect("kv mutex poisoned");
5811        let base_rows = guard
5812            .keys()
5813            .filter(|key| matches_primary_key(0, key))
5814            .count();
5815        let index_rows = guard
5816            .keys()
5817            .filter(|key| matches_secondary_index_key(0, 1, key))
5818            .count();
5819        assert_eq!(base_rows, 42);
5820        assert_eq!(
5821            index_rows, 42,
5822            "historical backfill plus concurrent indexed writes should leave one index row per base row"
5823        );
5824
5825        let _ = shutdown_tx.send(());
5826    }
5827
5828    #[derive(Clone)]
5829    struct DeferredChunkRangeHarness {
5830        first_chunk_sent: Arc<Notify>,
5831        release_second_chunk: Arc<Notify>,
5832        first_frame: ProtoRangeFrame,
5833        second_frame: ProtoRangeFrame,
5834    }
5835
5836    impl QueryService for DeferredChunkRangeHarness {
5837        async fn get(
5838            &self,
5839            _ctx: Context,
5840            _request: buffa::view::OwnedView<
5841                exoware_sdk::store::query::v1::GetRequestView<'static>,
5842            >,
5843        ) -> connectrpc::ServiceResult<ProtoGetResponse> {
5844            Err(ConnectError::unimplemented("test harness"))
5845        }
5846
5847        async fn get_many(
5848            &self,
5849            _ctx: Context,
5850            _request: buffa::view::OwnedView<
5851                exoware_sdk::store::query::v1::GetManyRequestView<'static>,
5852            >,
5853        ) -> connectrpc::ServiceResult<connectrpc::ServiceStream<ProtoGetManyFrame>> {
5854            Err(ConnectError::unimplemented("test harness"))
5855        }
5856
5857        async fn range(
5858            &self,
5859            _ctx: Context,
5860            _request: buffa::view::OwnedView<
5861                exoware_sdk::store::query::v1::RangeRequestView<'static>,
5862            >,
5863        ) -> connectrpc::ServiceResult<connectrpc::ServiceStream<ProtoRangeFrame>> {
5864            let first_chunk_sent = self.first_chunk_sent.clone();
5865            let release_second_chunk = self.release_second_chunk.clone();
5866            let first_frame = self.first_frame.clone();
5867            let second_frame = self.second_frame.clone();
5868            let stream = stream::try_unfold(0u8, move |state| {
5869                let first_chunk_sent = first_chunk_sent.clone();
5870                let release_second_chunk = release_second_chunk.clone();
5871                let first_frame = first_frame.clone();
5872                let second_frame = second_frame.clone();
5873                async move {
5874                    match state {
5875                        0 => {
5876                            first_chunk_sent.notify_one();
5877                            Ok(Some((first_frame, 1)))
5878                        }
5879                        1 => {
5880                            release_second_chunk.notified().await;
5881                            Ok(Some((second_frame, 2)))
5882                        }
5883                        _ => Ok(None),
5884                    }
5885                }
5886            });
5887            Ok(connectrpc::Response::stream(stream))
5888        }
5889
5890        async fn reduce(
5891            &self,
5892            _ctx: Context,
5893            _request: buffa::view::OwnedView<
5894                exoware_sdk::store::query::v1::ReduceRequestView<'static>,
5895            >,
5896        ) -> connectrpc::ServiceResult<ProtoReduceResponse> {
5897            Err(ConnectError::unimplemented("test harness"))
5898        }
5899    }
5900
5901    #[derive(Clone)]
5902    struct ObservedLimitRangeHarness {
5903        release_second_chunk: Arc<Notify>,
5904        observed_limit: Arc<AtomicUsize>,
5905        first_frame: ProtoRangeFrame,
5906        second_frame: ProtoRangeFrame,
5907    }
5908
5909    impl QueryService for ObservedLimitRangeHarness {
5910        async fn get(
5911            &self,
5912            _ctx: Context,
5913            _request: buffa::view::OwnedView<
5914                exoware_sdk::store::query::v1::GetRequestView<'static>,
5915            >,
5916        ) -> connectrpc::ServiceResult<ProtoGetResponse> {
5917            Err(ConnectError::unimplemented("test harness"))
5918        }
5919
5920        async fn get_many(
5921            &self,
5922            _ctx: Context,
5923            _request: buffa::view::OwnedView<
5924                exoware_sdk::store::query::v1::GetManyRequestView<'static>,
5925            >,
5926        ) -> connectrpc::ServiceResult<connectrpc::ServiceStream<ProtoGetManyFrame>> {
5927            Err(ConnectError::unimplemented("test harness"))
5928        }
5929
5930        async fn range(
5931            &self,
5932            _ctx: Context,
5933            request: buffa::view::OwnedView<
5934                exoware_sdk::store::query::v1::RangeRequestView<'static>,
5935            >,
5936        ) -> connectrpc::ServiceResult<connectrpc::ServiceStream<ProtoRangeFrame>> {
5937            let limit = request.limit.map(|v| v as usize).unwrap_or(usize::MAX);
5938            self.observed_limit.store(limit, AtomicOrdering::SeqCst);
5939            let release_second_chunk = self.release_second_chunk.clone();
5940            let first_frame = self.first_frame.clone();
5941            let second_frame = self.second_frame.clone();
5942            let stream = stream::try_unfold(0u8, move |state| {
5943                let release_second_chunk = release_second_chunk.clone();
5944                let first_frame = first_frame.clone();
5945                let second_frame = second_frame.clone();
5946                async move {
5947                    match state {
5948                        0 => Ok(Some((first_frame, 1))),
5949                        1 => {
5950                            if limit > 1 {
5951                                release_second_chunk.notified().await;
5952                                Ok(Some((second_frame, 2)))
5953                            } else {
5954                                Ok(None)
5955                            }
5956                        }
5957                        _ => Ok(None),
5958                    }
5959                }
5960            });
5961            Ok(connectrpc::Response::stream(stream))
5962        }
5963
5964        async fn reduce(
5965            &self,
5966            _ctx: Context,
5967            _request: buffa::view::OwnedView<
5968                exoware_sdk::store::query::v1::ReduceRequestView<'static>,
5969            >,
5970        ) -> connectrpc::ServiceResult<ProtoReduceResponse> {
5971            Err(ConnectError::unimplemented("test harness"))
5972        }
5973    }
5974
5975    #[derive(Clone)]
5976    struct ObservedLimitIndexRangeHarness {
5977        observed_limit: Arc<AtomicUsize>,
5978        entries_frame: ProtoRangeFrame,
5979    }
5980
5981    impl QueryService for ObservedLimitIndexRangeHarness {
5982        async fn get(
5983            &self,
5984            _ctx: Context,
5985            _request: buffa::view::OwnedView<
5986                exoware_sdk::store::query::v1::GetRequestView<'static>,
5987            >,
5988        ) -> connectrpc::ServiceResult<ProtoGetResponse> {
5989            Err(ConnectError::unimplemented("test harness"))
5990        }
5991
5992        async fn get_many(
5993            &self,
5994            _ctx: Context,
5995            _request: buffa::view::OwnedView<
5996                exoware_sdk::store::query::v1::GetManyRequestView<'static>,
5997            >,
5998        ) -> connectrpc::ServiceResult<connectrpc::ServiceStream<ProtoGetManyFrame>> {
5999            Err(ConnectError::unimplemented("test harness"))
6000        }
6001
6002        async fn range(
6003            &self,
6004            _ctx: Context,
6005            request: buffa::view::OwnedView<
6006                exoware_sdk::store::query::v1::RangeRequestView<'static>,
6007            >,
6008        ) -> connectrpc::ServiceResult<connectrpc::ServiceStream<ProtoRangeFrame>> {
6009            let limit = request
6010                .limit
6011                .map(|v| {
6012                    if v == u32::MAX {
6013                        usize::MAX
6014                    } else {
6015                        v as usize
6016                    }
6017                })
6018                .unwrap_or(usize::MAX);
6019            self.observed_limit.store(limit, AtomicOrdering::SeqCst);
6020            let entries_frame = self.entries_frame.clone();
6021            Ok(connectrpc::Response::stream(stream::iter(vec![Ok(
6022                entries_frame,
6023            )])))
6024        }
6025
6026        async fn reduce(
6027            &self,
6028            _ctx: Context,
6029            _request: buffa::view::OwnedView<
6030                exoware_sdk::store::query::v1::ReduceRequestView<'static>,
6031            >,
6032        ) -> connectrpc::ServiceResult<ProtoReduceResponse> {
6033            Err(ConnectError::unimplemented("test harness"))
6034        }
6035    }
6036
6037    #[tokio::test]
6038    async fn kv_scan_streaming_range_reads_emit_first_batch_before_full_range_completes() {
6039        let model = Arc::new(simple_int64_model(0));
6040        let first_chunk_sent = Arc::new(Notify::new());
6041        let release_second_chunk = Arc::new(Notify::new());
6042
6043        let encoded_row = (StoredRow { values: vec![None] }).encode().to_vec();
6044
6045        let first_results = {
6046            let mut results = Vec::with_capacity(BATCH_FLUSH_ROWS);
6047            for id in 0..BATCH_FLUSH_ROWS {
6048                let key =
6049                    encode_primary_key(model.table_prefix, &[&CellValue::Int64(id as i64)], &model)
6050                        .expect("primary key");
6051                results.push((key, encoded_row.clone()));
6052            }
6053            results
6054        };
6055        let first_frame = proto_range_entries_frame(first_results);
6056
6057        let second_results = {
6058            let key = encode_primary_key(
6059                model.table_prefix,
6060                &[&CellValue::Int64(BATCH_FLUSH_ROWS as i64)],
6061                &model,
6062            )
6063            .expect("primary key");
6064            vec![(key, encoded_row)]
6065        };
6066        let second_frame = proto_range_entries_frame(second_results);
6067
6068        let harness = DeferredChunkRangeHarness {
6069            first_chunk_sent: first_chunk_sent.clone(),
6070            release_second_chunk: release_second_chunk.clone(),
6071            first_frame,
6072            second_frame,
6073        };
6074        let connect = ConnectRpcService::new(QueryServiceServer::new(harness))
6075            .with_compression(connect_compression_registry());
6076        let app = Router::new().fallback_service(connect);
6077
6078        let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
6079            .await
6080            .expect("bind test listener");
6081        let url = format!("http://{}", listener.local_addr().expect("listener addr"));
6082        tokio::spawn(async move {
6083            axum::serve(listener, app).await.expect("serve test app");
6084        });
6085
6086        let client = StoreClient::new(&url);
6087        let scan = KvScanExec::new(
6088            client,
6089            model.clone(),
6090            Arc::new(Vec::new()),
6091            QueryPredicate::default(),
6092            None,
6093            model.schema.clone(),
6094            None,
6095        );
6096
6097        let session_ctx = SessionContext::new();
6098        let mut stream = scan
6099            .execute(0, session_ctx.task_ctx())
6100            .expect("scan execute should start");
6101
6102        tokio::time::timeout(Duration::from_secs(1), first_chunk_sent.notified())
6103            .await
6104            .expect("server should send first range frame");
6105        let first_batch = tokio::time::timeout(Duration::from_millis(200), stream.try_next())
6106            .await
6107            .expect("first record batch should arrive before the second stream chunk is released")
6108            .expect("stream poll should succeed")
6109            .expect("expected first record batch");
6110        assert_eq!(first_batch.num_rows(), BATCH_FLUSH_ROWS);
6111
6112        release_second_chunk.notify_one();
6113
6114        let second_batch = stream
6115            .try_next()
6116            .await
6117            .expect("second poll should succeed")
6118            .expect("expected second record batch");
6119        assert_eq!(second_batch.num_rows(), 1);
6120        assert!(
6121            stream
6122                .try_next()
6123                .await
6124                .expect("stream completion poll")
6125                .is_none(),
6126            "stream should finish after the second batch"
6127        );
6128    }
6129
6130    #[tokio::test]
6131    async fn kv_scan_sql_limit_is_pushed_upstream_on_exact_streaming_scan() {
6132        let release_second_chunk = Arc::new(Notify::new());
6133        let observed_limit = Arc::new(AtomicUsize::new(0));
6134        let model = simple_int64_model(0);
6135
6136        let encoded_row = (StoredRow { values: vec![None] }).encode().to_vec();
6137
6138        let first_key = encode_primary_key(model.table_prefix, &[&CellValue::Int64(1)], &model)
6139            .expect("first primary key");
6140        let second_key = encode_primary_key(model.table_prefix, &[&CellValue::Int64(2)], &model)
6141            .expect("second primary key");
6142
6143        let first_frame = proto_range_entries_frame(vec![(first_key, encoded_row.clone())]);
6144        let second_frame = proto_range_entries_frame(vec![(second_key, encoded_row)]);
6145
6146        let harness = ObservedLimitRangeHarness {
6147            release_second_chunk: release_second_chunk.clone(),
6148            observed_limit: observed_limit.clone(),
6149            first_frame,
6150            second_frame,
6151        };
6152        let connect = ConnectRpcService::new(QueryServiceServer::new(harness))
6153            .with_compression(connect_compression_registry());
6154        let app = Router::new().fallback_service(connect);
6155
6156        let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
6157            .await
6158            .expect("bind test listener");
6159        let url = format!("http://{}", listener.local_addr().expect("listener addr"));
6160        tokio::spawn(async move {
6161            axum::serve(listener, app).await.expect("serve test app");
6162        });
6163
6164        let client = StoreClient::new(&url);
6165        let schema = KvSchema::new(client)
6166            .table(
6167                "items",
6168                vec![TableColumnConfig::new("id", DataType::Int64, false)],
6169                vec!["id".to_string()],
6170                vec![],
6171            )
6172            .expect("schema");
6173        let ctx = SessionContext::new();
6174        schema.register_all(&ctx).expect("register");
6175
6176        let batches = tokio::time::timeout(Duration::from_millis(200), async {
6177            ctx.sql("SELECT id FROM items LIMIT 1")
6178                .await
6179                .expect("query")
6180                .collect()
6181                .await
6182                .expect("collect")
6183        })
6184        .await
6185        .expect("query with LIMIT 1 should finish without waiting for a delayed second chunk");
6186
6187        assert_eq!(
6188            batches.iter().map(|batch| batch.num_rows()).sum::<usize>(),
6189            1
6190        );
6191        assert_eq!(
6192            observed_limit.load(AtomicOrdering::SeqCst),
6193            1,
6194            "exact streaming scan should push SQL LIMIT upstream"
6195        );
6196        release_second_chunk.notify_one();
6197    }
6198
6199    #[tokio::test]
6200    async fn kv_scan_index_limit_does_not_push_upstream_when_seen_dedup_can_drop_entries() {
6201        let observed_limit = Arc::new(AtomicUsize::new(0));
6202        let config = KvTableConfig::new(
6203            0,
6204            vec![
6205                TableColumnConfig::new("id", DataType::Int64, false),
6206                TableColumnConfig::new("status", DataType::Utf8, false),
6207                TableColumnConfig::new("amount_cents", DataType::Int64, false),
6208            ],
6209            vec!["id".to_string()],
6210            vec![IndexSpec::new("status_idx", vec!["status".to_string()])
6211                .expect("valid")
6212                .with_cover_columns(vec!["status".to_string(), "amount_cents".to_string()])],
6213        )
6214        .expect("config");
6215        let model = TableModel::from_config(&config).expect("model");
6216        let spec = model
6217            .resolve_index_specs(&config.index_specs)
6218            .expect("specs")
6219            .into_iter()
6220            .next()
6221            .expect("status index spec");
6222        let stale_row = KvRow {
6223            values: vec![
6224                CellValue::Int64(7),
6225                CellValue::Utf8("closed".to_string()),
6226                CellValue::Int64(10),
6227            ],
6228        };
6229        let current_row = KvRow {
6230            values: vec![
6231                CellValue::Int64(7),
6232                CellValue::Utf8("open".to_string()),
6233                CellValue::Int64(10),
6234            ],
6235        };
6236        let unique_row = KvRow {
6237            values: vec![
6238                CellValue::Int64(8),
6239                CellValue::Utf8("open".to_string()),
6240                CellValue::Int64(20),
6241            ],
6242        };
6243        let stale_key = encode_secondary_index_key(model.table_prefix, &spec, &model, &stale_row)
6244            .expect("stale index key");
6245        let current_key =
6246            encode_secondary_index_key(model.table_prefix, &spec, &model, &current_row)
6247                .expect("current index key");
6248        let unique_key = encode_secondary_index_key(model.table_prefix, &spec, &model, &unique_row)
6249            .expect("unique index key");
6250        let stale_payload =
6251            encode_secondary_index_value(&stale_row, &model, &spec).expect("stale payload");
6252        let current_payload =
6253            encode_secondary_index_value(&current_row, &model, &spec).expect("current payload");
6254        let unique_payload =
6255            encode_secondary_index_value(&unique_row, &model, &spec).expect("unique payload");
6256
6257        let entries_frame = proto_range_entries_frame(vec![
6258            (stale_key, stale_payload),
6259            (current_key, current_payload),
6260            (unique_key, unique_payload),
6261        ]);
6262        let harness = ObservedLimitIndexRangeHarness {
6263            observed_limit: observed_limit.clone(),
6264            entries_frame,
6265        };
6266        let connect = ConnectRpcService::new(QueryServiceServer::new(harness))
6267            .with_compression(connect_compression_registry());
6268        let app = Router::new().fallback_service(connect);
6269
6270        let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
6271            .await
6272            .expect("bind test listener");
6273        let url = format!("http://{}", listener.local_addr().expect("listener addr"));
6274        tokio::spawn(async move {
6275            axum::serve(listener, app).await.expect("serve test app");
6276        });
6277
6278        let client = StoreClient::new(&url);
6279        let schema = KvSchema::new(client)
6280            .table(
6281                "orders",
6282                vec![
6283                    TableColumnConfig::new("id", DataType::Int64, false),
6284                    TableColumnConfig::new("status", DataType::Utf8, false),
6285                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
6286                ],
6287                vec!["id".to_string()],
6288                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
6289                    .expect("valid")
6290                    .with_cover_columns(vec!["status".to_string(), "amount_cents".to_string()])],
6291            )
6292            .expect("schema");
6293        let ctx = SessionContext::new();
6294        schema.register_all(&ctx).expect("register");
6295
6296        let batches = ctx
6297            .sql(
6298                "SELECT id, amount_cents \
6299                 FROM orders \
6300                 WHERE status IN ('open', 'closed') \
6301                 LIMIT 2",
6302            )
6303            .await
6304            .expect("query")
6305            .collect()
6306            .await
6307            .expect("collect");
6308
6309        assert_eq!(
6310            batches.iter().map(|batch| batch.num_rows()).sum::<usize>(),
6311            2
6312        );
6313        assert_eq!(
6314            observed_limit.load(AtomicOrdering::SeqCst),
6315            usize::MAX,
6316            "index streaming scans should not push SQL LIMIT upstream while seen-dedup can drop duplicate primary keys"
6317        );
6318    }
6319
6320    #[tokio::test]
6321    async fn zorder_covering_index_scan_filters_false_positive_morton_span_rows() {
6322        let state = MockState {
6323            kv: Arc::new(Mutex::new(BTreeMap::new())),
6324            range_calls: Arc::new(AtomicUsize::new(0)),
6325            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
6326            sequence_number: Arc::new(AtomicU64::new(0)),
6327        };
6328        let (base_url, shutdown_tx) = spawn_mock_server(state).await;
6329        let client = StoreClient::new(&base_url);
6330
6331        let schema = KvSchema::new(client)
6332            .table(
6333                "points",
6334                vec![
6335                    TableColumnConfig::new("x", DataType::Int64, false),
6336                    TableColumnConfig::new("y", DataType::Int64, false),
6337                    TableColumnConfig::new("id", DataType::Int64, false),
6338                    TableColumnConfig::new("value", DataType::Int64, false),
6339                ],
6340                vec!["id".to_string()],
6341                vec![
6342                    IndexSpec::z_order("xy_z", vec!["x".to_string(), "y".to_string()])
6343                        .expect("valid")
6344                        .with_cover_columns(vec!["value".to_string()]),
6345                ],
6346            )
6347            .expect("schema");
6348
6349        let mut writer = schema.batch_writer();
6350        for (x, y, id, value) in [
6351            (0, 2, 2, 20),
6352            (1, 1, 11, 110),
6353            (1, 2, 12, 120),
6354            (2, 1, 21, 210),
6355            (2, 2, 22, 220),
6356            (3, 0, 30, 300),
6357        ] {
6358            writer
6359                .insert(
6360                    "points",
6361                    vec![
6362                        CellValue::Int64(x),
6363                        CellValue::Int64(y),
6364                        CellValue::Int64(id),
6365                        CellValue::Int64(value),
6366                    ],
6367                )
6368                .expect("row");
6369        }
6370        writer.flush().await.expect("flush");
6371
6372        let ctx = SessionContext::new();
6373        schema.register_all(&ctx).expect("register");
6374
6375        let batches = ctx
6376            .sql(
6377                "SELECT id, value FROM points \
6378                 WHERE x >= 1 AND x <= 2 AND y >= 1 AND y <= 2 \
6379                 ORDER BY id",
6380            )
6381            .await
6382            .expect("query")
6383            .collect()
6384            .await
6385            .expect("collect");
6386
6387        let mut rows = Vec::new();
6388        for batch in &batches {
6389            let ids = batch
6390                .column(0)
6391                .as_any()
6392                .downcast_ref::<Int64Array>()
6393                .expect("id int64");
6394            let values = batch
6395                .column(1)
6396                .as_any()
6397                .downcast_ref::<Int64Array>()
6398                .expect("value int64");
6399            for row_idx in 0..batch.num_rows() {
6400                rows.push((ids.value(row_idx), values.value(row_idx)));
6401            }
6402        }
6403        assert_eq!(rows, vec![(11, 110), (12, 120), (21, 210), (22, 220)]);
6404
6405        let _ = shutdown_tx.send(());
6406    }
6407
6408    #[tokio::test]
6409    async fn aggregate_pushdown_uses_range_reduce_for_supported_global_aggregates() {
6410        let state = MockState {
6411            kv: Arc::new(Mutex::new(BTreeMap::new())),
6412            range_calls: Arc::new(AtomicUsize::new(0)),
6413            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
6414            sequence_number: Arc::new(AtomicU64::new(0)),
6415        };
6416        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
6417        let client = StoreClient::new(&base_url);
6418
6419        let schema = KvSchema::new(client)
6420            .table(
6421                "orders",
6422                vec![
6423                    TableColumnConfig::new("id", DataType::Int64, false),
6424                    TableColumnConfig::new("status", DataType::Utf8, false),
6425                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
6426                ],
6427                vec!["id".to_string()],
6428                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
6429                    .expect("valid")
6430                    .with_cover_columns(vec!["amount_cents".to_string()])],
6431            )
6432            .expect("schema");
6433
6434        let mut writer = schema.batch_writer();
6435        for (id, status, amount) in [
6436            (1, "open", 10),
6437            (2, "closed", 15),
6438            (3, "open", 30),
6439            (4, "closed", 40),
6440        ] {
6441            writer
6442                .insert(
6443                    "orders",
6444                    vec![
6445                        CellValue::Int64(id),
6446                        CellValue::Utf8(status.to_string()),
6447                        CellValue::Int64(amount),
6448                    ],
6449                )
6450                .expect("row");
6451        }
6452        writer.flush().await.expect("flush");
6453
6454        let ctx = SessionContext::new();
6455        schema.register_all(&ctx).expect("register");
6456
6457        state.range_calls.store(0, AtomicOrdering::SeqCst);
6458        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
6459
6460        let df = ctx
6461            .sql(
6462                "SELECT COUNT(*) AS row_count, SUM(amount_cents) AS total_cents, \
6463                 AVG(amount_cents) AS avg_cents \
6464                 FROM orders WHERE status = 'open'",
6465            )
6466            .await
6467            .expect("query");
6468        let batches = df.collect().await.expect("collect");
6469
6470        assert_eq!(batches.len(), 1);
6471        let batch = &batches[0];
6472        let row_count = ScalarValue::try_from_array(batch.column(0), 0).expect("row_count scalar");
6473        let total = batch
6474            .column(1)
6475            .as_any()
6476            .downcast_ref::<Int64Array>()
6477            .expect("sum int64")
6478            .value(0);
6479        let avg = batch
6480            .column(2)
6481            .as_any()
6482            .downcast_ref::<Float64Array>()
6483            .expect("avg float64")
6484            .value(0);
6485        assert!(matches!(
6486            row_count,
6487            ScalarValue::UInt64(Some(2)) | ScalarValue::Int64(Some(2))
6488        ));
6489        assert_eq!(total, 40);
6490        assert_eq!(avg, 20.0);
6491        assert_eq!(state.range_calls.load(AtomicOrdering::SeqCst), 0);
6492        assert!(
6493            state.range_reduce_calls.load(AtomicOrdering::SeqCst) >= 1,
6494            "supported aggregate should use range reduction path"
6495        );
6496
6497        let _ = shutdown_tx.send(());
6498    }
6499
6500    /// Store `/v1/range` is inclusive on both ends; `id <= N` and `BETWEEN` must include the end key.
6501    #[tokio::test]
6502    async fn primary_key_inclusive_upper_bound_streaming_scan_uses_range() {
6503        let state = MockState {
6504            kv: Arc::new(Mutex::new(BTreeMap::new())),
6505            range_calls: Arc::new(AtomicUsize::new(0)),
6506            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
6507            sequence_number: Arc::new(AtomicU64::new(0)),
6508        };
6509        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
6510        let client = StoreClient::new(&base_url);
6511
6512        let schema = KvSchema::new(client)
6513            .table(
6514                "inc_pk",
6515                vec![
6516                    TableColumnConfig::new("id", DataType::Int64, false),
6517                    TableColumnConfig::new("amount", DataType::Int64, false),
6518                ],
6519                vec!["id".to_string()],
6520                vec![],
6521            )
6522            .expect("schema");
6523
6524        let mut writer = schema.batch_writer();
6525        for id in 1i64..=5i64 {
6526            writer
6527                .insert(
6528                    "inc_pk",
6529                    vec![CellValue::Int64(id), CellValue::Int64(id * 100)],
6530                )
6531                .expect("row");
6532        }
6533        writer.flush().await.expect("flush");
6534
6535        let ctx = SessionContext::new();
6536        schema.register_all(&ctx).expect("register");
6537
6538        state.range_calls.store(0, AtomicOrdering::SeqCst);
6539        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
6540
6541        let batches = ctx
6542            .sql("SELECT id FROM inc_pk WHERE id <= 3 ORDER BY id")
6543            .await
6544            .expect("lte query")
6545            .collect()
6546            .await
6547            .expect("collect");
6548        let mut ids = Vec::new();
6549        for batch in &batches {
6550            let col = batch
6551                .column(0)
6552                .as_any()
6553                .downcast_ref::<Int64Array>()
6554                .expect("id");
6555            for i in 0..batch.num_rows() {
6556                ids.push(col.value(i));
6557            }
6558        }
6559        assert_eq!(ids, vec![1, 2, 3], "id <= 3 must include id 3");
6560        assert!(
6561            state.range_calls.load(AtomicOrdering::SeqCst) >= 1,
6562            "PK bounded scan should call range"
6563        );
6564        assert_eq!(
6565            state.range_reduce_calls.load(AtomicOrdering::SeqCst),
6566            0,
6567            "streaming scan must not use range_reduce"
6568        );
6569
6570        state.range_calls.store(0, AtomicOrdering::SeqCst);
6571        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
6572
6573        let batches = ctx
6574            .sql("SELECT id FROM inc_pk WHERE id BETWEEN 2 AND 4 ORDER BY id")
6575            .await
6576            .expect("between query")
6577            .collect()
6578            .await
6579            .expect("collect");
6580        ids.clear();
6581        for batch in &batches {
6582            let col = batch
6583                .column(0)
6584                .as_any()
6585                .downcast_ref::<Int64Array>()
6586                .expect("id");
6587            for i in 0..batch.num_rows() {
6588                ids.push(col.value(i));
6589            }
6590        }
6591        assert_eq!(ids, vec![2, 3, 4], "BETWEEN must include both endpoints");
6592        assert!(state.range_calls.load(AtomicOrdering::SeqCst) >= 1);
6593        assert_eq!(state.range_reduce_calls.load(AtomicOrdering::SeqCst), 0);
6594
6595        let _ = shutdown_tx.send(());
6596    }
6597
6598    #[tokio::test]
6599    async fn primary_key_inclusive_upper_bound_scalar_aggregates_use_range_reduce() {
6600        let state = MockState {
6601            kv: Arc::new(Mutex::new(BTreeMap::new())),
6602            range_calls: Arc::new(AtomicUsize::new(0)),
6603            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
6604            sequence_number: Arc::new(AtomicU64::new(0)),
6605        };
6606        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
6607        let client = StoreClient::new(&base_url);
6608
6609        let schema = KvSchema::new(client)
6610            .table(
6611                "inc_pk",
6612                vec![
6613                    TableColumnConfig::new("id", DataType::Int64, false),
6614                    TableColumnConfig::new("amount", DataType::Int64, false),
6615                ],
6616                vec!["id".to_string()],
6617                vec![],
6618            )
6619            .expect("schema");
6620
6621        let mut writer = schema.batch_writer();
6622        for id in 1i64..=5i64 {
6623            writer
6624                .insert(
6625                    "inc_pk",
6626                    vec![CellValue::Int64(id), CellValue::Int64(id * 100)],
6627                )
6628                .expect("row");
6629        }
6630        writer.flush().await.expect("flush");
6631
6632        let ctx = SessionContext::new();
6633        schema.register_all(&ctx).expect("register");
6634
6635        state.range_calls.store(0, AtomicOrdering::SeqCst);
6636        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
6637
6638        let batches = ctx
6639            .sql("SELECT COUNT(*) AS c, SUM(amount) AS s FROM inc_pk WHERE id <= 3")
6640            .await
6641            .expect("lte agg")
6642            .collect()
6643            .await
6644            .expect("collect");
6645        assert_eq!(batches.len(), 1);
6646        let batch = &batches[0];
6647        let c = ScalarValue::try_from_array(batch.column(0), 0).expect("count");
6648        assert!(
6649            matches!(
6650                c,
6651                ScalarValue::UInt64(Some(3)) | ScalarValue::Int64(Some(3))
6652            ),
6653            "count should include id=3"
6654        );
6655        assert_eq!(
6656            batch
6657                .column(1)
6658                .as_any()
6659                .downcast_ref::<Int64Array>()
6660                .expect("sum")
6661                .value(0),
6662            100 + 200 + 300
6663        );
6664        assert_eq!(state.range_calls.load(AtomicOrdering::SeqCst), 0);
6665        assert!(
6666            state.range_reduce_calls.load(AtomicOrdering::SeqCst) >= 1,
6667            "scalar aggregate on PK range should use range_reduce"
6668        );
6669
6670        state.range_calls.store(0, AtomicOrdering::SeqCst);
6671        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
6672
6673        let batches = ctx
6674            .sql("SELECT SUM(amount) AS s FROM inc_pk WHERE id BETWEEN 2 AND 4")
6675            .await
6676            .expect("between agg")
6677            .collect()
6678            .await
6679            .expect("collect");
6680        assert_eq!(batches.len(), 1);
6681        let batch = &batches[0];
6682        assert_eq!(
6683            batch
6684                .column(0)
6685                .as_any()
6686                .downcast_ref::<Int64Array>()
6687                .expect("sum")
6688                .value(0),
6689            200 + 300 + 400
6690        );
6691        assert_eq!(state.range_calls.load(AtomicOrdering::SeqCst), 0);
6692        assert!(
6693            state.range_reduce_calls.load(AtomicOrdering::SeqCst) >= 1,
6694            "BETWEEN aggregate should use range_reduce"
6695        );
6696
6697        let _ = shutdown_tx.send(());
6698    }
6699
6700    #[tokio::test]
6701    async fn aggregate_pushdown_uses_zorder_index_with_worker_filter() {
6702        let state = MockState {
6703            kv: Arc::new(Mutex::new(BTreeMap::new())),
6704            range_calls: Arc::new(AtomicUsize::new(0)),
6705            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
6706            sequence_number: Arc::new(AtomicU64::new(0)),
6707        };
6708        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
6709        let client = StoreClient::new(&base_url);
6710
6711        let schema = KvSchema::new(client)
6712            .table(
6713                "points",
6714                vec![
6715                    TableColumnConfig::new("x", DataType::Int64, false),
6716                    TableColumnConfig::new("y", DataType::Int64, false),
6717                    TableColumnConfig::new("id", DataType::Int64, false),
6718                    TableColumnConfig::new("value", DataType::Int64, false),
6719                ],
6720                vec!["id".to_string()],
6721                vec![
6722                    IndexSpec::z_order("xy_z", vec!["x".to_string(), "y".to_string()])
6723                        .expect("valid")
6724                        .with_cover_columns(vec!["value".to_string()]),
6725                ],
6726            )
6727            .expect("schema");
6728
6729        let mut writer = schema.batch_writer();
6730        for (x, y, id, value) in [
6731            (0, 2, 2, 20),
6732            (1, 1, 11, 110),
6733            (1, 2, 12, 120),
6734            (2, 1, 21, 210),
6735            (2, 2, 22, 220),
6736            (3, 0, 30, 300),
6737        ] {
6738            writer
6739                .insert(
6740                    "points",
6741                    vec![
6742                        CellValue::Int64(x),
6743                        CellValue::Int64(y),
6744                        CellValue::Int64(id),
6745                        CellValue::Int64(value),
6746                    ],
6747                )
6748                .expect("row");
6749        }
6750        writer.flush().await.expect("flush");
6751
6752        let ctx = SessionContext::new();
6753        schema.register_all(&ctx).expect("register");
6754
6755        state.range_calls.store(0, AtomicOrdering::SeqCst);
6756        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
6757
6758        let batches = ctx
6759            .sql(
6760                "SELECT COUNT(*) AS row_count, SUM(value) AS total_value \
6761                 FROM points \
6762                 WHERE x >= 1 AND x <= 2 AND y >= 1 AND y <= 2",
6763            )
6764            .await
6765            .expect("query")
6766            .collect()
6767            .await
6768            .expect("collect");
6769
6770        assert_eq!(batches.len(), 1);
6771        let batch = &batches[0];
6772        let row_count = ScalarValue::try_from_array(batch.column(0), 0).expect("row_count scalar");
6773        let total = batch
6774            .column(1)
6775            .as_any()
6776            .downcast_ref::<Int64Array>()
6777            .expect("sum int64")
6778            .value(0);
6779        assert!(matches!(
6780            row_count,
6781            ScalarValue::UInt64(Some(4)) | ScalarValue::Int64(Some(4))
6782        ));
6783        assert_eq!(total, 660);
6784        assert_eq!(state.range_calls.load(AtomicOrdering::SeqCst), 0);
6785        assert!(
6786            state.range_reduce_calls.load(AtomicOrdering::SeqCst) >= 1,
6787            "z-order aggregate should use range reduction path"
6788        );
6789
6790        let _ = shutdown_tx.send(());
6791    }
6792
6793    #[tokio::test]
6794    async fn aggregate_pushdown_avg_merges_sum_and_count_across_multiple_ranges() {
6795        let state = MockState {
6796            kv: Arc::new(Mutex::new(BTreeMap::new())),
6797            range_calls: Arc::new(AtomicUsize::new(0)),
6798            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
6799            sequence_number: Arc::new(AtomicU64::new(0)),
6800        };
6801        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
6802        let client = StoreClient::new(&base_url);
6803
6804        let schema = KvSchema::new(client)
6805            .table(
6806                "orders",
6807                vec![
6808                    TableColumnConfig::new("id", DataType::Int64, false),
6809                    TableColumnConfig::new("status", DataType::Utf8, false),
6810                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
6811                ],
6812                vec!["id".to_string()],
6813                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
6814                    .expect("valid")
6815                    .with_cover_columns(vec!["amount_cents".to_string()])],
6816            )
6817            .expect("schema");
6818
6819        let mut writer = schema.batch_writer();
6820        for (id, status, amount) in [
6821            (1, "open", 10),
6822            (2, "open", 20),
6823            (3, "closed", 100),
6824            (4, "pending", 1_000),
6825        ] {
6826            writer
6827                .insert(
6828                    "orders",
6829                    vec![
6830                        CellValue::Int64(id),
6831                        CellValue::Utf8(status.to_string()),
6832                        CellValue::Int64(amount),
6833                    ],
6834                )
6835                .expect("row");
6836        }
6837        writer.flush().await.expect("flush");
6838
6839        let ctx = SessionContext::new();
6840        schema.register_all(&ctx).expect("register");
6841
6842        state.range_calls.store(0, AtomicOrdering::SeqCst);
6843        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
6844
6845        let batches = ctx
6846            .sql(
6847                "SELECT AVG(amount_cents) AS avg_cents \
6848                 FROM orders \
6849                 WHERE status IN ('open', 'closed')",
6850            )
6851            .await
6852            .expect("query")
6853            .collect()
6854            .await
6855            .expect("collect");
6856
6857        assert_eq!(batches.len(), 1);
6858        let batch = &batches[0];
6859        let avg = batch
6860            .column(0)
6861            .as_any()
6862            .downcast_ref::<Float64Array>()
6863            .expect("avg float64")
6864            .value(0);
6865        let expected = 130.0 / 3.0;
6866        assert!(
6867            (avg - expected).abs() < 1e-12,
6868            "AVG should merge SUM+COUNT across unequal-count ranges: got {avg}, expected {expected}"
6869        );
6870        assert_eq!(state.range_calls.load(AtomicOrdering::SeqCst), 0);
6871        assert_eq!(
6872            state.range_reduce_calls.load(AtomicOrdering::SeqCst),
6873            2,
6874            "status IN (...) should expand to two pushed reduction ranges"
6875        );
6876
6877        let _ = shutdown_tx.send(());
6878    }
6879
6880    #[tokio::test]
6881    async fn aggregate_pushdown_supports_filtered_global_aggregates() {
6882        let state = MockState {
6883            kv: Arc::new(Mutex::new(BTreeMap::new())),
6884            range_calls: Arc::new(AtomicUsize::new(0)),
6885            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
6886            sequence_number: Arc::new(AtomicU64::new(0)),
6887        };
6888        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
6889        let client = StoreClient::new(&base_url);
6890
6891        let schema = KvSchema::new(client)
6892            .table(
6893                "orders",
6894                vec![
6895                    TableColumnConfig::new("id", DataType::Int64, false),
6896                    TableColumnConfig::new("status", DataType::Utf8, false),
6897                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
6898                ],
6899                vec!["id".to_string()],
6900                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
6901                    .expect("valid")
6902                    .with_cover_columns(vec!["amount_cents".to_string()])],
6903            )
6904            .expect("schema");
6905
6906        let mut writer = schema.batch_writer();
6907        for (id, status, amount) in [
6908            (1, "open", 10),
6909            (2, "closed", 15),
6910            (3, "open", 30),
6911            (4, "closed", 40),
6912        ] {
6913            writer
6914                .insert(
6915                    "orders",
6916                    vec![
6917                        CellValue::Int64(id),
6918                        CellValue::Utf8(status.to_string()),
6919                        CellValue::Int64(amount),
6920                    ],
6921                )
6922                .expect("row");
6923        }
6924        writer.flush().await.expect("flush");
6925
6926        let ctx = SessionContext::new();
6927        schema.register_all(&ctx).expect("register");
6928
6929        state.range_calls.store(0, AtomicOrdering::SeqCst);
6930        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
6931
6932        let query = "SELECT COUNT(*) FILTER (WHERE status = 'open') AS open_count, \
6933                            COUNT(*) FILTER (WHERE status = 'closed') AS closed_count, \
6934                            AVG(amount_cents) FILTER (WHERE status = 'closed') AS closed_avg \
6935                     FROM orders";
6936        let batches = ctx
6937            .sql(query)
6938            .await
6939            .expect("query")
6940            .collect()
6941            .await
6942            .expect("collect");
6943
6944        assert_eq!(batches.len(), 1);
6945        let batch = &batches[0];
6946        assert_count_scalar(batch, 0, 0, 2);
6947        assert_count_scalar(batch, 1, 0, 2);
6948        let closed_avg = batch
6949            .column(2)
6950            .as_any()
6951            .downcast_ref::<Float64Array>()
6952            .expect("avg float64")
6953            .value(0);
6954        assert_eq!(closed_avg, 27.5);
6955        assert_eq!(state.range_calls.load(AtomicOrdering::SeqCst), 0);
6956        assert!(
6957            state.range_reduce_calls.load(AtomicOrdering::SeqCst) >= 3,
6958            "filtered aggregate pushdown should use dedicated reduction jobs"
6959        );
6960
6961        let _ = shutdown_tx.send(());
6962    }
6963
6964    #[tokio::test]
6965    async fn aggregate_pushdown_supports_case_filtered_global_aggregates() {
6966        let state = MockState {
6967            kv: Arc::new(Mutex::new(BTreeMap::new())),
6968            range_calls: Arc::new(AtomicUsize::new(0)),
6969            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
6970            sequence_number: Arc::new(AtomicU64::new(0)),
6971        };
6972        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
6973        let client = StoreClient::new(&base_url);
6974
6975        let schema = KvSchema::new(client)
6976            .table(
6977                "orders",
6978                vec![
6979                    TableColumnConfig::new("id", DataType::Int64, false),
6980                    TableColumnConfig::new("status", DataType::Utf8, false),
6981                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
6982                ],
6983                vec!["id".to_string()],
6984                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
6985                    .expect("valid")
6986                    .with_cover_columns(vec!["amount_cents".to_string()])],
6987            )
6988            .expect("schema");
6989
6990        let mut writer = schema.batch_writer();
6991        for (id, status, amount) in [
6992            (1, "open", 10),
6993            (2, "closed", 15),
6994            (3, "open", 30),
6995            (4, "closed", 40),
6996        ] {
6997            writer
6998                .insert(
6999                    "orders",
7000                    vec![
7001                        CellValue::Int64(id),
7002                        CellValue::Utf8(status.to_string()),
7003                        CellValue::Int64(amount),
7004                    ],
7005                )
7006                .expect("row");
7007        }
7008        writer.flush().await.expect("flush");
7009
7010        let ctx = SessionContext::new();
7011        schema.register_all(&ctx).expect("register");
7012
7013        state.range_calls.store(0, AtomicOrdering::SeqCst);
7014        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
7015
7016        let query = "SELECT SUM(CASE status WHEN 'open' THEN amount_cents END) AS open_total, \
7017                            COUNT(CASE status WHEN 'closed' THEN 1 END) AS closed_count, \
7018                            AVG(CASE WHEN status = 'closed' THEN amount_cents END) AS closed_avg \
7019                     FROM orders";
7020        let batches = ctx
7021            .sql(query)
7022            .await
7023            .expect("query")
7024            .collect()
7025            .await
7026            .expect("collect");
7027
7028        assert_eq!(batches.len(), 1);
7029        let batch = &batches[0];
7030        assert_eq!(
7031            ScalarValue::try_from_array(batch.column(0), 0).expect("sum scalar"),
7032            ScalarValue::Int64(Some(40))
7033        );
7034        assert_count_scalar(batch, 1, 0, 2);
7035        let closed_avg = batch
7036            .column(2)
7037            .as_any()
7038            .downcast_ref::<Float64Array>()
7039            .expect("avg float64")
7040            .value(0);
7041        assert_eq!(closed_avg, 27.5);
7042        assert_eq!(state.range_calls.load(AtomicOrdering::SeqCst), 0);
7043        assert!(
7044            state.range_reduce_calls.load(AtomicOrdering::SeqCst) >= 3,
7045            "case-based conditional aggregates should use reduction jobs"
7046        );
7047
7048        let _ = shutdown_tx.send(());
7049    }
7050
7051    #[tokio::test]
7052    async fn aggregate_pushdown_supports_casted_group_and_aggregate_expressions() {
7053        let state = MockState {
7054            kv: Arc::new(Mutex::new(BTreeMap::new())),
7055            range_calls: Arc::new(AtomicUsize::new(0)),
7056            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
7057            sequence_number: Arc::new(AtomicU64::new(0)),
7058        };
7059        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
7060        let client = StoreClient::new(&base_url);
7061
7062        let schema = KvSchema::new(client)
7063            .table(
7064                "orders",
7065                vec![
7066                    TableColumnConfig::new("id", DataType::Int64, false),
7067                    TableColumnConfig::new("status", DataType::Utf8, false),
7068                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
7069                ],
7070                vec!["id".to_string()],
7071                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
7072                    .expect("valid")
7073                    .with_cover_columns(vec!["amount_cents".to_string()])],
7074            )
7075            .expect("schema");
7076
7077        let mut writer = schema.batch_writer();
7078        for (id, status, amount) in [
7079            (1, "open", 10),
7080            (2, "open", 30),
7081            (3, "closed", 15),
7082            (4, "closed", 40),
7083        ] {
7084            writer
7085                .insert(
7086                    "orders",
7087                    vec![
7088                        CellValue::Int64(id),
7089                        CellValue::Utf8(status.to_string()),
7090                        CellValue::Int64(amount),
7091                    ],
7092                )
7093                .expect("row");
7094        }
7095        writer.flush().await.expect("flush");
7096
7097        let ctx = SessionContext::new();
7098        schema.register_all(&ctx).expect("register");
7099
7100        state.range_calls.store(0, AtomicOrdering::SeqCst);
7101        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
7102
7103        let batches = ctx
7104            .sql(
7105                "SELECT CAST(status AS VARCHAR) AS status_text, \
7106                        SUM(CAST(amount_cents AS DOUBLE)) AS total_cents \
7107                 FROM orders \
7108                 GROUP BY CAST(status AS VARCHAR) \
7109                 ORDER BY status_text",
7110            )
7111            .await
7112            .expect("query")
7113            .collect()
7114            .await
7115            .expect("collect");
7116
7117        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 2);
7118        let batch = &batches[0];
7119        let status = ScalarValue::try_from_array(batch.column(0), 0).expect("status scalar");
7120        assert_eq!(scalar_to_string(&status).as_deref(), Some("closed"));
7121        let closed_total = batch
7122            .column(1)
7123            .as_any()
7124            .downcast_ref::<Float64Array>()
7125            .expect("sum float64")
7126            .value(0);
7127        let open_total = batch
7128            .column(1)
7129            .as_any()
7130            .downcast_ref::<Float64Array>()
7131            .expect("sum float64")
7132            .value(1);
7133        assert_eq!(closed_total, 55.0);
7134        assert_eq!(open_total, 40.0);
7135        assert_eq!(state.range_calls.load(AtomicOrdering::SeqCst), 0);
7136        assert!(
7137            state.range_reduce_calls.load(AtomicOrdering::SeqCst) >= 1,
7138            "casted grouped aggregates should stay on the reduction path"
7139        );
7140
7141        let _ = shutdown_tx.send(());
7142    }
7143
7144    #[tokio::test]
7145    async fn aggregate_pushdown_supports_computed_aggregate_inputs() {
7146        let state = MockState {
7147            kv: Arc::new(Mutex::new(BTreeMap::new())),
7148            range_calls: Arc::new(AtomicUsize::new(0)),
7149            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
7150            sequence_number: Arc::new(AtomicU64::new(0)),
7151        };
7152        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
7153        let client = StoreClient::new(&base_url);
7154
7155        let schema = KvSchema::new(client)
7156            .table(
7157                "orders",
7158                vec![
7159                    TableColumnConfig::new("id", DataType::Int64, false),
7160                    TableColumnConfig::new("price_cents", DataType::Int64, false),
7161                    TableColumnConfig::new("qty", DataType::Int64, false),
7162                    TableColumnConfig::new("duration_ms", DataType::Int64, false),
7163                ],
7164                vec!["id".to_string()],
7165                vec![],
7166            )
7167            .expect("schema");
7168
7169        let mut writer = schema.batch_writer();
7170        for (id, price, qty, duration_ms) in [(1, 10, 2, 500), (2, 15, 3, 2500), (3, 7, 4, 1000)] {
7171            writer
7172                .insert(
7173                    "orders",
7174                    vec![
7175                        CellValue::Int64(id),
7176                        CellValue::Int64(price),
7177                        CellValue::Int64(qty),
7178                        CellValue::Int64(duration_ms),
7179                    ],
7180                )
7181                .expect("row");
7182        }
7183        writer.flush().await.expect("flush");
7184
7185        let ctx = SessionContext::new();
7186        schema.register_all(&ctx).expect("register");
7187
7188        state.range_calls.store(0, AtomicOrdering::SeqCst);
7189        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
7190
7191        let batches = ctx
7192            .sql(
7193                "SELECT SUM(price_cents * qty) AS total_revenue, \
7194                        AVG(duration_ms / 1e3) AS avg_seconds \
7195                 FROM orders",
7196            )
7197            .await
7198            .expect("query")
7199            .collect()
7200            .await
7201            .expect("collect");
7202
7203        assert_eq!(batches.len(), 1);
7204        let batch = &batches[0];
7205        assert_eq!(
7206            ScalarValue::try_from_array(batch.column(0), 0).expect("sum scalar"),
7207            ScalarValue::Int64(Some(93))
7208        );
7209        let avg_seconds = batch
7210            .column(1)
7211            .as_any()
7212            .downcast_ref::<Float64Array>()
7213            .expect("avg float64")
7214            .value(0);
7215        assert!((avg_seconds - (4.0 / 3.0)).abs() < 1e-12);
7216        assert_eq!(state.range_calls.load(AtomicOrdering::SeqCst), 0);
7217        assert!(
7218            state.range_reduce_calls.load(AtomicOrdering::SeqCst) >= 2,
7219            "computed aggregate inputs should use reduction jobs"
7220        );
7221
7222        let _ = shutdown_tx.send(());
7223    }
7224
7225    #[tokio::test]
7226    async fn aggregate_pushdown_supports_add_and_subtract_inputs() {
7227        let state = MockState {
7228            kv: Arc::new(Mutex::new(BTreeMap::new())),
7229            range_calls: Arc::new(AtomicUsize::new(0)),
7230            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
7231            sequence_number: Arc::new(AtomicU64::new(0)),
7232        };
7233        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
7234        let client = StoreClient::new(&base_url);
7235
7236        let schema = KvSchema::new(client)
7237            .table(
7238                "orders",
7239                vec![
7240                    TableColumnConfig::new("id", DataType::Int64, false),
7241                    TableColumnConfig::new("price_cents", DataType::Int64, false),
7242                    TableColumnConfig::new("fee_cents", DataType::Int64, false),
7243                    TableColumnConfig::new("discount_cents", DataType::Int64, false),
7244                ],
7245                vec!["id".to_string()],
7246                vec![],
7247            )
7248            .expect("schema");
7249
7250        let mut writer = schema.batch_writer();
7251        for (id, price, fee, discount) in [(1, 10, 2, 1), (2, 15, 3, 4), (3, 7, 1, 2)] {
7252            writer
7253                .insert(
7254                    "orders",
7255                    vec![
7256                        CellValue::Int64(id),
7257                        CellValue::Int64(price),
7258                        CellValue::Int64(fee),
7259                        CellValue::Int64(discount),
7260                    ],
7261                )
7262                .expect("row");
7263        }
7264        writer.flush().await.expect("flush");
7265
7266        let ctx = SessionContext::new();
7267        schema.register_all(&ctx).expect("register");
7268
7269        state.range_calls.store(0, AtomicOrdering::SeqCst);
7270        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
7271
7272        let batches = ctx
7273            .sql(
7274                "SELECT SUM(price_cents + fee_cents) AS gross_plus_fee, \
7275                        SUM(price_cents - discount_cents) AS net_total \
7276                 FROM orders",
7277            )
7278            .await
7279            .expect("query")
7280            .collect()
7281            .await
7282            .expect("collect");
7283
7284        assert_eq!(batches.len(), 1);
7285        let batch = &batches[0];
7286        assert_eq!(
7287            ScalarValue::try_from_array(batch.column(0), 0).expect("sum scalar"),
7288            ScalarValue::Int64(Some(38))
7289        );
7290        assert_eq!(
7291            ScalarValue::try_from_array(batch.column(1), 0).expect("sum scalar"),
7292            ScalarValue::Int64(Some(25))
7293        );
7294        assert_eq!(state.range_calls.load(AtomicOrdering::SeqCst), 0);
7295        assert!(
7296            state.range_reduce_calls.load(AtomicOrdering::SeqCst) >= 2,
7297            "add/sub aggregate inputs should use reduction jobs"
7298        );
7299
7300        let _ = shutdown_tx.send(());
7301    }
7302
7303    #[tokio::test]
7304    async fn aggregate_pushdown_supports_case_filtered_computed_aggregates() {
7305        let state = MockState {
7306            kv: Arc::new(Mutex::new(BTreeMap::new())),
7307            range_calls: Arc::new(AtomicUsize::new(0)),
7308            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
7309            sequence_number: Arc::new(AtomicU64::new(0)),
7310        };
7311        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
7312        let client = StoreClient::new(&base_url);
7313
7314        let schema = KvSchema::new(client)
7315            .table(
7316                "orders",
7317                vec![
7318                    TableColumnConfig::new("id", DataType::Int64, false),
7319                    TableColumnConfig::new("status", DataType::Utf8, false),
7320                    TableColumnConfig::new("price_cents", DataType::Int64, false),
7321                    TableColumnConfig::new("qty", DataType::Int64, false),
7322                ],
7323                vec!["id".to_string()],
7324                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
7325                    .expect("valid")
7326                    .with_cover_columns(vec!["price_cents".to_string(), "qty".to_string()])],
7327            )
7328            .expect("schema");
7329
7330        let mut writer = schema.batch_writer();
7331        for (id, status, price, qty) in [
7332            (1, "open", 10, 2),
7333            (2, "closed", 99, 1),
7334            (3, "open", 15, 3),
7335            (4, "closed", 7, 4),
7336        ] {
7337            writer
7338                .insert(
7339                    "orders",
7340                    vec![
7341                        CellValue::Int64(id),
7342                        CellValue::Utf8(status.to_string()),
7343                        CellValue::Int64(price),
7344                        CellValue::Int64(qty),
7345                    ],
7346                )
7347                .expect("row");
7348        }
7349        writer.flush().await.expect("flush");
7350
7351        let ctx = SessionContext::new();
7352        schema.register_all(&ctx).expect("register");
7353
7354        state.range_calls.store(0, AtomicOrdering::SeqCst);
7355        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
7356
7357        let batches = ctx
7358            .sql(
7359                "SELECT SUM(CASE WHEN status = 'open' THEN price_cents * qty END) \
7360                 AS open_revenue \
7361                 FROM orders",
7362            )
7363            .await
7364            .expect("query")
7365            .collect()
7366            .await
7367            .expect("collect");
7368
7369        assert_eq!(batches.len(), 1);
7370        let batch = &batches[0];
7371        assert_eq!(
7372            ScalarValue::try_from_array(batch.column(0), 0).expect("sum scalar"),
7373            ScalarValue::Int64(Some(65))
7374        );
7375        assert_eq!(state.range_calls.load(AtomicOrdering::SeqCst), 0);
7376        assert!(
7377            state.range_reduce_calls.load(AtomicOrdering::SeqCst) >= 1,
7378            "case-filtered computed aggregate should use reduction jobs"
7379        );
7380
7381        let _ = shutdown_tx.send(());
7382    }
7383
7384    #[tokio::test]
7385    async fn aggregate_pushdown_does_not_rewrite_sum_case_else_zero_semantics() {
7386        let state = MockState {
7387            kv: Arc::new(Mutex::new(BTreeMap::new())),
7388            range_calls: Arc::new(AtomicUsize::new(0)),
7389            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
7390            sequence_number: Arc::new(AtomicU64::new(0)),
7391        };
7392        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
7393        let client = StoreClient::new(&base_url);
7394
7395        let schema = KvSchema::new(client)
7396            .table(
7397                "orders",
7398                vec![
7399                    TableColumnConfig::new("id", DataType::Int64, false),
7400                    TableColumnConfig::new("region", DataType::Utf8, false),
7401                    TableColumnConfig::new("status", DataType::Utf8, false),
7402                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
7403                ],
7404                vec!["id".to_string()],
7405                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
7406                    .expect("valid")
7407                    .with_cover_columns(vec!["region".to_string(), "amount_cents".to_string()])],
7408            )
7409            .expect("schema");
7410
7411        let mut writer = schema.batch_writer();
7412        for (id, region, status, amount) in [
7413            (1, "east", "open", 10),
7414            (2, "east", "closed", 20),
7415            (3, "west", "closed", 30),
7416        ] {
7417            writer
7418                .insert(
7419                    "orders",
7420                    vec![
7421                        CellValue::Int64(id),
7422                        CellValue::Utf8(region.to_string()),
7423                        CellValue::Utf8(status.to_string()),
7424                        CellValue::Int64(amount),
7425                    ],
7426                )
7427                .expect("row");
7428        }
7429        writer.flush().await.expect("flush");
7430
7431        let ctx = SessionContext::new();
7432        schema.register_all(&ctx).expect("register");
7433
7434        state.range_calls.store(0, AtomicOrdering::SeqCst);
7435        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
7436
7437        let batches = ctx
7438            .sql(
7439                "SELECT region, \
7440                        SUM(CASE WHEN status = 'open' THEN amount_cents ELSE 0 END) AS open_total \
7441                 FROM orders \
7442                 GROUP BY region \
7443                 ORDER BY region",
7444            )
7445            .await
7446            .expect("query")
7447            .collect()
7448            .await
7449            .expect("collect");
7450
7451        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 2);
7452        let batch = &batches[0];
7453        assert_eq!(
7454            ScalarValue::try_from_array(batch.column(0), 0).expect("region scalar"),
7455            ScalarValue::Utf8(Some("east".to_string()))
7456        );
7457        assert_eq!(
7458            ScalarValue::try_from_array(batch.column(1), 0).expect("sum scalar"),
7459            ScalarValue::Int64(Some(10))
7460        );
7461        assert_eq!(
7462            ScalarValue::try_from_array(batch.column(0), 1).expect("region scalar"),
7463            ScalarValue::Utf8(Some("west".to_string()))
7464        );
7465        assert_eq!(
7466            ScalarValue::try_from_array(batch.column(1), 1).expect("sum scalar"),
7467            ScalarValue::Int64(Some(0))
7468        );
7469        assert_eq!(
7470            state.range_reduce_calls.load(AtomicOrdering::SeqCst),
7471            0,
7472            "SUM(CASE ... ELSE 0 END) must not push down because FILTER rewrite changes semantics"
7473        );
7474
7475        let _ = shutdown_tx.send(());
7476    }
7477
7478    #[tokio::test]
7479    async fn aggregate_pushdown_supports_computed_group_keys() {
7480        let state = MockState {
7481            kv: Arc::new(Mutex::new(BTreeMap::new())),
7482            range_calls: Arc::new(AtomicUsize::new(0)),
7483            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
7484            sequence_number: Arc::new(AtomicU64::new(0)),
7485        };
7486        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
7487        let client = StoreClient::new(&base_url);
7488
7489        let schema = KvSchema::new(client)
7490            .table(
7491                "events",
7492                vec![
7493                    TableColumnConfig::new("id", DataType::Int64, false),
7494                    TableColumnConfig::new("country", DataType::Utf8, false),
7495                    TableColumnConfig::new(
7496                        "occurred_at",
7497                        DataType::Timestamp(TimeUnit::Microsecond, None),
7498                        false,
7499                    ),
7500                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
7501                ],
7502                vec!["id".to_string()],
7503                vec![],
7504            )
7505            .expect("schema");
7506
7507        let day_micros = 86_400_000_000i64;
7508        let day0 = 1_700_000_000_000_000i64;
7509        let day1 = day0 + day_micros;
7510        let day0_bucket = day0.div_euclid(day_micros) * day_micros;
7511        let day1_bucket = day1.div_euclid(day_micros) * day_micros;
7512        let mut writer = schema.batch_writer();
7513        for (id, country, occurred_at, amount) in [
7514            (1, "East", day0 + 111, 10),
7515            (2, "east", day0 + 222, 30),
7516            (3, "West", day1 + 333, 7),
7517        ] {
7518            writer
7519                .insert(
7520                    "events",
7521                    vec![
7522                        CellValue::Int64(id),
7523                        CellValue::Utf8(country.to_string()),
7524                        CellValue::Timestamp(occurred_at),
7525                        CellValue::Int64(amount),
7526                    ],
7527                )
7528                .expect("row");
7529        }
7530        writer.flush().await.expect("flush");
7531
7532        let ctx = SessionContext::new();
7533        schema.register_all(&ctx).expect("register");
7534
7535        state.range_calls.store(0, AtomicOrdering::SeqCst);
7536        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
7537
7538        let batches = ctx
7539            .sql(
7540                "SELECT lower(country) AS country_norm, \
7541                        date_trunc('day', occurred_at) AS day_bucket, \
7542                        SUM(amount_cents) AS total_cents \
7543                 FROM events \
7544                 GROUP BY lower(country), date_trunc('day', occurred_at) \
7545                 ORDER BY country_norm, day_bucket",
7546            )
7547            .await
7548            .expect("query")
7549            .collect()
7550            .await
7551            .expect("collect");
7552
7553        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 2);
7554        let batch = &batches[0];
7555        assert_eq!(
7556            scalar_to_string(
7557                &ScalarValue::try_from_array(batch.column(0), 0).expect("country scalar")
7558            )
7559            .as_deref(),
7560            Some("east")
7561        );
7562        assert_eq!(
7563            ScalarValue::try_from_array(batch.column(1), 0).expect("day scalar"),
7564            ScalarValue::TimestampMicrosecond(Some(day0_bucket), None)
7565        );
7566        assert_eq!(
7567            ScalarValue::try_from_array(batch.column(2), 0).expect("sum scalar"),
7568            ScalarValue::Int64(Some(40))
7569        );
7570        assert_eq!(
7571            scalar_to_string(
7572                &ScalarValue::try_from_array(batch.column(0), 1).expect("country scalar")
7573            )
7574            .as_deref(),
7575            Some("west")
7576        );
7577        assert_eq!(
7578            ScalarValue::try_from_array(batch.column(1), 1).expect("day scalar"),
7579            ScalarValue::TimestampMicrosecond(Some(day1_bucket), None)
7580        );
7581        assert_eq!(
7582            ScalarValue::try_from_array(batch.column(2), 1).expect("sum scalar"),
7583            ScalarValue::Int64(Some(7))
7584        );
7585        assert_eq!(state.range_calls.load(AtomicOrdering::SeqCst), 0);
7586        assert!(
7587            state.range_reduce_calls.load(AtomicOrdering::SeqCst) >= 1,
7588            "computed group keys should use grouped reduction path"
7589        );
7590
7591        let _ = shutdown_tx.send(());
7592    }
7593
7594    #[tokio::test]
7595    async fn aggregate_pushdown_supports_group_by_queries() {
7596        let state = MockState {
7597            kv: Arc::new(Mutex::new(BTreeMap::new())),
7598            range_calls: Arc::new(AtomicUsize::new(0)),
7599            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
7600            sequence_number: Arc::new(AtomicU64::new(0)),
7601        };
7602        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
7603        let client = StoreClient::new(&base_url);
7604
7605        let schema = KvSchema::new(client)
7606            .table(
7607                "orders",
7608                vec![
7609                    TableColumnConfig::new("id", DataType::Int64, false),
7610                    TableColumnConfig::new("status", DataType::Utf8, false),
7611                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
7612                ],
7613                vec!["id".to_string()],
7614                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
7615                    .expect("valid")
7616                    .with_cover_columns(vec!["amount_cents".to_string()])],
7617            )
7618            .expect("schema");
7619
7620        let mut writer = schema.batch_writer();
7621        for (id, status, amount) in [
7622            (1, "open", 10),
7623            (2, "open", 30),
7624            (3, "closed", 15),
7625            (4, "closed", 40),
7626        ] {
7627            writer
7628                .insert(
7629                    "orders",
7630                    vec![
7631                        CellValue::Int64(id),
7632                        CellValue::Utf8(status.to_string()),
7633                        CellValue::Int64(amount),
7634                    ],
7635                )
7636                .expect("row");
7637        }
7638        writer.flush().await.expect("flush");
7639
7640        let ctx = SessionContext::new();
7641        schema.register_all(&ctx).expect("register");
7642
7643        state.range_calls.store(0, AtomicOrdering::SeqCst);
7644        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
7645
7646        let batches = ctx
7647            .sql(
7648                "SELECT status, COUNT(*) AS row_count, SUM(amount_cents) AS total_cents \
7649                 FROM orders GROUP BY status ORDER BY status",
7650            )
7651            .await
7652            .expect("query")
7653            .collect()
7654            .await
7655            .expect("collect");
7656
7657        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 2);
7658        let batch = &batches[0];
7659        assert_eq!(
7660            ScalarValue::try_from_array(batch.column(0), 0).expect("status scalar"),
7661            ScalarValue::Utf8(Some("closed".to_string()))
7662        );
7663        assert_count_scalar(batch, 1, 0, 2);
7664        assert_eq!(
7665            ScalarValue::try_from_array(batch.column(2), 0).expect("sum scalar"),
7666            ScalarValue::Int64(Some(55))
7667        );
7668        assert_eq!(
7669            ScalarValue::try_from_array(batch.column(0), 1).expect("status scalar"),
7670            ScalarValue::Utf8(Some("open".to_string()))
7671        );
7672        assert_count_scalar(batch, 1, 1, 2);
7673        assert_eq!(
7674            ScalarValue::try_from_array(batch.column(2), 1).expect("sum scalar"),
7675            ScalarValue::Int64(Some(40))
7676        );
7677        assert_eq!(state.range_calls.load(AtomicOrdering::SeqCst), 0);
7678        assert!(
7679            state.range_reduce_calls.load(AtomicOrdering::SeqCst) >= 2,
7680            "group-by aggregate should use grouped range reduction path"
7681        );
7682
7683        let _ = shutdown_tx.send(());
7684    }
7685
7686    #[tokio::test]
7687    async fn aggregate_pushdown_group_by_float_canonicalizes_signed_zero() {
7688        let state = MockState {
7689            kv: Arc::new(Mutex::new(BTreeMap::new())),
7690            range_calls: Arc::new(AtomicUsize::new(0)),
7691            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
7692            sequence_number: Arc::new(AtomicU64::new(0)),
7693        };
7694        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
7695        let client = StoreClient::new(&base_url);
7696
7697        let schema = KvSchema::new(client)
7698            .table(
7699                "metrics",
7700                vec![
7701                    TableColumnConfig::new("id", DataType::Int64, false),
7702                    TableColumnConfig::new("score", DataType::Float64, false),
7703                ],
7704                vec!["id".to_string()],
7705                vec![],
7706            )
7707            .expect("schema");
7708
7709        let mut writer = schema.batch_writer();
7710        for (id, score) in [(1, -0.0), (2, 0.0), (3, 1.5)] {
7711            writer
7712                .insert(
7713                    "metrics",
7714                    vec![CellValue::Int64(id), CellValue::Float64(score)],
7715                )
7716                .expect("row");
7717        }
7718        writer.flush().await.expect("flush");
7719
7720        let ctx = SessionContext::new();
7721        schema.register_all(&ctx).expect("register");
7722
7723        state.range_calls.store(0, AtomicOrdering::SeqCst);
7724        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
7725
7726        let batches = ctx
7727            .sql(
7728                "SELECT score, COUNT(*) AS row_count \
7729                 FROM metrics GROUP BY score ORDER BY row_count DESC, score",
7730            )
7731            .await
7732            .expect("query")
7733            .collect()
7734            .await
7735            .expect("collect");
7736
7737        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 2);
7738        let batch = &batches[0];
7739        let top_score = batch
7740            .column(0)
7741            .as_any()
7742            .downcast_ref::<Float64Array>()
7743            .expect("score float64")
7744            .value(0);
7745        assert_eq!(top_score.to_bits(), 0.0f64.to_bits());
7746        assert_count_scalar(batch, 1, 0, 2);
7747        assert_eq!(state.range_calls.load(AtomicOrdering::SeqCst), 0);
7748        assert!(
7749            state.range_reduce_calls.load(AtomicOrdering::SeqCst) >= 1,
7750            "float group-by aggregate should stay on grouped reduction path"
7751        );
7752
7753        let _ = shutdown_tx.send(());
7754    }
7755
7756    #[tokio::test]
7757    async fn aggregate_pushdown_supports_filtered_group_by_queries() {
7758        let state = MockState {
7759            kv: Arc::new(Mutex::new(BTreeMap::new())),
7760            range_calls: Arc::new(AtomicUsize::new(0)),
7761            range_reduce_calls: Arc::new(AtomicUsize::new(0)),
7762            sequence_number: Arc::new(AtomicU64::new(0)),
7763        };
7764        let (base_url, shutdown_tx) = spawn_mock_server(state.clone()).await;
7765        let client = StoreClient::new(&base_url);
7766
7767        let schema = KvSchema::new(client)
7768            .table(
7769                "orders",
7770                vec![
7771                    TableColumnConfig::new("id", DataType::Int64, false),
7772                    TableColumnConfig::new("region", DataType::Utf8, false),
7773                    TableColumnConfig::new("status", DataType::Utf8, false),
7774                    TableColumnConfig::new("amount_cents", DataType::Int64, false),
7775                ],
7776                vec!["id".to_string()],
7777                vec![IndexSpec::new("status_idx", vec!["status".to_string()])
7778                    .expect("valid")
7779                    .with_cover_columns(vec!["region".to_string(), "amount_cents".to_string()])],
7780            )
7781            .expect("schema");
7782
7783        let mut writer = schema.batch_writer();
7784        for (id, region, status, amount) in [
7785            (1, "east", "open", 10),
7786            (2, "east", "closed", 20),
7787            (3, "west", "open", 30),
7788            (4, "north", "closed", 40),
7789        ] {
7790            writer
7791                .insert(
7792                    "orders",
7793                    vec![
7794                        CellValue::Int64(id),
7795                        CellValue::Utf8(region.to_string()),
7796                        CellValue::Utf8(status.to_string()),
7797                        CellValue::Int64(amount),
7798                    ],
7799                )
7800                .expect("row");
7801        }
7802        writer.flush().await.expect("flush");
7803
7804        let ctx = SessionContext::new();
7805        schema.register_all(&ctx).expect("register");
7806
7807        state.range_calls.store(0, AtomicOrdering::SeqCst);
7808        state.range_reduce_calls.store(0, AtomicOrdering::SeqCst);
7809
7810        let batches = ctx
7811            .sql(
7812                "SELECT region, \
7813                        COUNT(*) FILTER (WHERE status = 'open') AS open_count, \
7814                        SUM(amount_cents) FILTER (WHERE status = 'closed') AS closed_total \
7815                 FROM orders \
7816                 GROUP BY region \
7817                 ORDER BY region",
7818            )
7819            .await
7820            .expect("query")
7821            .collect()
7822            .await
7823            .expect("collect");
7824
7825        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 3);
7826        let batch = &batches[0];
7827
7828        assert_eq!(
7829            ScalarValue::try_from_array(batch.column(0), 0).expect("region scalar"),
7830            ScalarValue::Utf8(Some("east".to_string()))
7831        );
7832        assert_count_scalar(batch, 1, 0, 1);
7833        assert_eq!(
7834            ScalarValue::try_from_array(batch.column(2), 0).expect("sum scalar"),
7835            ScalarValue::Int64(Some(20))
7836        );
7837
7838        assert_eq!(
7839            ScalarValue::try_from_array(batch.column(0), 1).expect("region scalar"),
7840            ScalarValue::Utf8(Some("north".to_string()))
7841        );
7842        assert_count_scalar(batch, 1, 1, 0);
7843        assert_eq!(
7844            ScalarValue::try_from_array(batch.column(2), 1).expect("sum scalar"),
7845            ScalarValue::Int64(Some(40))
7846        );
7847
7848        assert_eq!(
7849            ScalarValue::try_from_array(batch.column(0), 2).expect("region scalar"),
7850            ScalarValue::Utf8(Some("west".to_string()))
7851        );
7852        assert_count_scalar(batch, 1, 2, 1);
7853        assert_eq!(
7854            ScalarValue::try_from_array(batch.column(2), 2).expect("sum scalar"),
7855            ScalarValue::Int64(None)
7856        );
7857
7858        assert_eq!(state.range_calls.load(AtomicOrdering::SeqCst), 0);
7859        assert!(
7860            state.range_reduce_calls.load(AtomicOrdering::SeqCst) >= 3,
7861            "filtered group-by aggregate should use grouped reduction plus seed job"
7862        );
7863
7864        let _ = shutdown_tx.send(());
7865    }
7866
7867    mod e2e {
7868        use super::*;
7869        use axum::{routing::get, Router};
7870        use datafusion::prelude::SessionContext;
7871        use exoware_sdk::StoreClient;
7872        use exoware_server::{connect_stack, AppState};
7873        use exoware_simulator::RocksStore;
7874        use tempfile::tempdir;
7875
7876        struct TestServers {
7877            ingest_url: String,
7878            query_url: String,
7879        }
7880
7881        impl TestServers {
7882            fn client(&self) -> StoreClient {
7883                StoreClient::builder()
7884                    .health_url(&self.query_url)
7885                    .ingest_url(&self.ingest_url)
7886                    .query_url(&self.query_url)
7887                    .compact_url(&self.ingest_url)
7888                    .stream_url(&self.query_url)
7889                    .build()
7890                    .expect("test server URLs are set")
7891            }
7892        }
7893
7894        async fn spawn_e2e_servers() -> TestServers {
7895            let dir = tempdir().expect("tempdir");
7896            let db = RocksStore::open(dir.path()).expect("db");
7897            let state = AppState::new(std::sync::Arc::new(db));
7898            let connect = connect_stack(state);
7899            let app = Router::new()
7900                .route("/health", get(|| async { "ok" }))
7901                .fallback_service(connect);
7902            let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
7903                .await
7904                .expect("bind");
7905            let url = format!("http://{}", listener.local_addr().unwrap());
7906            tokio::spawn(async move {
7907                axum::serve(listener, app).await.expect("serve");
7908            });
7909            for _ in 0..200 {
7910                if reqwest::get(format!("{url}/health"))
7911                    .await
7912                    .ok()
7913                    .is_some_and(|r| r.status().is_success())
7914                {
7915                    return TestServers {
7916                        ingest_url: url.clone(),
7917                        query_url: url,
7918                    };
7919                }
7920                tokio::time::sleep(std::time::Duration::from_millis(25)).await;
7921            }
7922            panic!("e2e simulator did not become ready");
7923        }
7924
7925        #[tokio::test]
7926        async fn sql_insert_and_select_through_real_ingest_query_workers() {
7927            let servers = spawn_e2e_servers().await;
7928            let client = servers.client();
7929
7930            let schema = KvSchema::new(client)
7931                .table(
7932                    "orders",
7933                    vec![
7934                        TableColumnConfig::new("id", DataType::Int64, false),
7935                        TableColumnConfig::new("status", DataType::Utf8, false),
7936                        TableColumnConfig::new("amount_cents", DataType::Int64, false),
7937                    ],
7938                    vec!["id".to_string()],
7939                    vec![IndexSpec::new("status_idx", vec!["status".to_string()])
7940                        .expect("valid")
7941                        .with_cover_columns(vec!["amount_cents".to_string()])],
7942                )
7943                .expect("schema");
7944
7945            let mut writer = schema.batch_writer();
7946            for (id, status, amount) in [
7947                (1i64, "open", 100i64),
7948                (2, "closed", 200),
7949                (3, "open", 300),
7950                (4, "closed", 400),
7951                (5, "open", 500),
7952            ] {
7953                writer
7954                    .insert(
7955                        "orders",
7956                        vec![
7957                            CellValue::Int64(id),
7958                            CellValue::Utf8(status.to_string()),
7959                            CellValue::Int64(amount),
7960                        ],
7961                    )
7962                    .expect("insert row");
7963            }
7964            writer.flush().await.expect("flush batch");
7965
7966            let ctx = SessionContext::new();
7967            schema.register_all(&ctx).expect("register tables");
7968
7969            let batches = ctx
7970                .sql("SELECT id, amount_cents FROM orders ORDER BY id")
7971                .await
7972                .expect("full scan query")
7973                .collect()
7974                .await
7975                .expect("collect full scan");
7976            let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
7977            assert_eq!(total_rows, 5, "all 5 rows returned from full scan");
7978
7979            let mut ids = Vec::new();
7980            let mut amounts = Vec::new();
7981            for batch in &batches {
7982                let id_col = batch
7983                    .column(0)
7984                    .as_any()
7985                    .downcast_ref::<Int64Array>()
7986                    .expect("id column");
7987                let amt_col = batch
7988                    .column(1)
7989                    .as_any()
7990                    .downcast_ref::<Int64Array>()
7991                    .expect("amount column");
7992                for i in 0..batch.num_rows() {
7993                    ids.push(id_col.value(i));
7994                    amounts.push(amt_col.value(i));
7995                }
7996            }
7997            assert_eq!(ids, vec![1, 2, 3, 4, 5]);
7998            assert_eq!(amounts, vec![100, 200, 300, 400, 500]);
7999
8000            let filtered = ctx
8001                .sql(
8002                    "SELECT id, amount_cents FROM orders \
8003                     WHERE status = 'open' ORDER BY id",
8004                )
8005                .await
8006                .expect("filtered query")
8007                .collect()
8008                .await
8009                .expect("collect filtered");
8010            let mut filtered_ids = Vec::new();
8011            let mut filtered_amounts = Vec::new();
8012            for batch in &filtered {
8013                let id_col = batch
8014                    .column(0)
8015                    .as_any()
8016                    .downcast_ref::<Int64Array>()
8017                    .expect("id column");
8018                let amt_col = batch
8019                    .column(1)
8020                    .as_any()
8021                    .downcast_ref::<Int64Array>()
8022                    .expect("amount column");
8023                for i in 0..batch.num_rows() {
8024                    filtered_ids.push(id_col.value(i));
8025                    filtered_amounts.push(amt_col.value(i));
8026                }
8027            }
8028            assert_eq!(filtered_ids, vec![1, 3, 5]);
8029            assert_eq!(filtered_amounts, vec![100, 300, 500]);
8030
8031            let agg = ctx
8032                .sql(
8033                    "SELECT COUNT(*) AS cnt, SUM(amount_cents) AS total \
8034                     FROM orders WHERE status = 'open'",
8035                )
8036                .await
8037                .expect("aggregate query")
8038                .collect()
8039                .await
8040                .expect("collect aggregate");
8041            assert_eq!(agg.len(), 1);
8042            let batch = &agg[0];
8043            assert_eq!(batch.num_rows(), 1);
8044            assert_count_scalar(batch, 0, 0, 3);
8045            let total = ScalarValue::try_from_array(batch.column(1), 0).expect("sum scalar");
8046            match total {
8047                ScalarValue::Int64(Some(v)) => assert_eq!(v, 900),
8048                other => panic!("unexpected sum type: {other:?}"),
8049            }
8050        }
8051    }
8052}