use std::sync::Arc;
use std::time::Duration;
use arrow_array::RecordBatchReader;
use futures::future::Either;
use futures::{FutureExt, TryFutureExt};
use lance::dataset::{
MergeInsertBuilder as LanceMergeInsertBuilder, WhenMatched, WhenNotMatchedBySource,
};
use serde::{Deserialize, Serialize};
use crate::error::{Error, Result};
use super::{BaseTable, NativeTable};
pub(crate) mod lsm;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct MergeResult {
#[serde(default)]
pub version: u64,
#[serde(default)]
pub num_inserted_rows: u64,
#[serde(default)]
pub num_updated_rows: u64,
#[serde(default)]
pub num_deleted_rows: u64,
#[serde(default)]
pub num_attempts: u32,
#[serde(default)]
pub num_rows: u64,
}
#[derive(Debug, Clone)]
pub enum MergeFilter {
Sql(String),
Expr(datafusion_expr::Expr),
}
#[derive(Debug, Clone)]
pub struct MergeInsertBuilder {
table: Arc<dyn BaseTable>,
pub(crate) on: Vec<String>,
pub(crate) when_matched_update_all: bool,
pub(crate) when_matched_update_all_filt: Option<MergeFilter>,
pub(crate) when_not_matched_insert_all: bool,
pub(crate) when_not_matched_by_source_delete: bool,
pub(crate) when_not_matched_by_source_delete_filt: Option<MergeFilter>,
pub(crate) timeout: Option<Duration>,
pub(crate) use_index: bool,
pub(crate) use_lsm_write: Option<bool>,
pub(crate) validate_single_shard: bool,
}
impl MergeInsertBuilder {
pub(super) fn new(table: Arc<dyn BaseTable>, on: Vec<String>) -> Self {
Self {
table,
on,
when_matched_update_all: false,
when_matched_update_all_filt: None,
when_not_matched_insert_all: false,
when_not_matched_by_source_delete: false,
when_not_matched_by_source_delete_filt: None,
timeout: None,
use_index: true,
use_lsm_write: None,
validate_single_shard: true,
}
}
pub fn when_matched_update_all(&mut self, condition: Option<String>) -> &mut Self {
self.when_matched_update_all = true;
self.when_matched_update_all_filt = condition.map(MergeFilter::Sql);
self
}
pub fn when_matched_update_all_expr(&mut self, condition: datafusion_expr::Expr) -> &mut Self {
self.when_matched_update_all = true;
self.when_matched_update_all_filt = Some(MergeFilter::Expr(condition));
self
}
pub fn when_not_matched_insert_all(&mut self) -> &mut Self {
self.when_not_matched_insert_all = true;
self
}
pub fn when_not_matched_by_source_delete(&mut self, filter: Option<String>) -> &mut Self {
self.when_not_matched_by_source_delete = true;
self.when_not_matched_by_source_delete_filt = filter.map(MergeFilter::Sql);
self
}
pub fn when_not_matched_by_source_delete_expr(
&mut self,
filter: datafusion_expr::Expr,
) -> &mut Self {
self.when_not_matched_by_source_delete = true;
self.when_not_matched_by_source_delete_filt = Some(MergeFilter::Expr(filter));
self
}
pub fn timeout(&mut self, timeout: Duration) -> &mut Self {
self.timeout = Some(timeout);
self
}
pub fn use_index(&mut self, use_index: bool) -> &mut Self {
self.use_index = use_index;
self
}
pub fn use_lsm_write(&mut self, use_lsm_write: bool) -> &mut Self {
self.use_lsm_write = Some(use_lsm_write);
self
}
pub fn validate_single_shard(&mut self, validate_single_shard: bool) -> &mut Self {
self.validate_single_shard = validate_single_shard;
self
}
pub async fn execute(self, new_data: Box<dyn RecordBatchReader + Send>) -> Result<MergeResult> {
self.table.clone().merge_insert(self, new_data).await
}
}
pub(crate) async fn execute_merge_insert(
table: &NativeTable,
params: MergeInsertBuilder,
new_data: Box<dyn RecordBatchReader + Send>,
) -> Result<MergeResult> {
match lsm::lsm_dispatch_decision(table, ¶ms).await? {
lsm::LsmDispatch::Lsm(plan) => {
let future =
lsm::execute_lsm_merge_insert(table, plan, params.validate_single_shard, new_data);
return match params.timeout {
Some(timeout) => match tokio::time::timeout(timeout, future).await {
Ok(result) => result,
Err(_) => Err(Error::Runtime {
message: "merge insert timed out".to_string(),
}),
},
None => future.await,
};
}
lsm::LsmDispatch::Standard => {}
}
let dataset = table.dataset.get().await?;
let mut builder = LanceMergeInsertBuilder::try_new(dataset.clone(), params.on)?;
match (
params.when_matched_update_all,
params.when_matched_update_all_filt,
) {
(false, _) => builder.when_matched(WhenMatched::DoNothing),
(true, None) => builder.when_matched(WhenMatched::UpdateAll),
(true, Some(MergeFilter::Sql(filt))) => {
builder.when_matched(WhenMatched::update_if(&dataset, &filt)?)
}
(true, Some(MergeFilter::Expr(expr))) => {
builder.when_matched(WhenMatched::update_if_expr(expr))
}
};
if params.when_not_matched_insert_all {
builder.when_not_matched(lance::dataset::WhenNotMatched::InsertAll);
} else {
builder.when_not_matched(lance::dataset::WhenNotMatched::DoNothing);
}
if params.when_not_matched_by_source_delete {
let behavior = match params.when_not_matched_by_source_delete_filt {
Some(MergeFilter::Sql(filter)) => {
WhenNotMatchedBySource::delete_if(dataset.as_ref(), &filter)?
}
Some(MergeFilter::Expr(expr)) => WhenNotMatchedBySource::DeleteIf(expr),
None => WhenNotMatchedBySource::Delete,
};
builder.when_not_matched_by_source(behavior);
} else {
builder.when_not_matched_by_source(WhenNotMatchedBySource::Keep);
}
builder.use_index(params.use_index);
let future = if let Some(timeout) = params.timeout {
let future = builder
.retry_timeout(timeout)
.try_build()?
.execute_reader(new_data);
Either::Left(tokio::time::timeout(timeout, future).map(|res| match res {
Ok(Ok((new_dataset, stats))) => Ok((new_dataset, stats)),
Ok(Err(e)) => Err(e.into()),
Err(_) => Err(Error::Runtime {
message: "merge insert timed out".to_string(),
}),
}))
} else {
let job = builder.try_build()?;
Either::Right(job.execute_reader(new_data).map_err(|e| e.into()))
};
let (new_dataset, stats) = future.await?;
let version = new_dataset.manifest().version;
table.dataset.update(new_dataset.as_ref().clone());
Ok(MergeResult {
version,
num_updated_rows: stats.num_updated_rows,
num_inserted_rows: stats.num_inserted_rows,
num_deleted_rows: stats.num_deleted_rows,
num_attempts: stats.num_attempts,
num_rows: stats.num_inserted_rows + stats.num_updated_rows,
})
}
#[cfg(test)]
mod tests {
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, RecordBatchReader};
use arrow_schema::{DataType, Field, Schema};
use std::sync::Arc;
use crate::connect;
fn merge_insert_test_batches(offset: i32, age: i32) -> Box<dyn RecordBatchReader + Send> {
let schema = Arc::new(Schema::new(vec![
Field::new("i", DataType::Int32, false),
Field::new("age", DataType::Int32, false),
]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(Int32Array::from_iter_values(offset..(offset + 10))),
Arc::new(Int32Array::from_iter_values(std::iter::repeat_n(age, 10))),
],
)
.unwrap();
Box::new(RecordBatchIterator::new(vec![Ok(batch)], schema))
}
#[tokio::test]
async fn test_merge_insert() {
let conn = connect("memory://").execute().await.unwrap();
let batches = merge_insert_test_batches(0, 0);
let table = conn
.create_table("my_table", batches)
.execute()
.await
.unwrap();
assert_eq!(table.count_rows(None).await.unwrap(), 10);
let new_batches = merge_insert_test_batches(5, 1);
let mut merge_insert_builder = table.merge_insert(&["i"]);
merge_insert_builder.when_not_matched_insert_all();
let result = merge_insert_builder.execute(new_batches).await.unwrap();
assert_eq!(table.count_rows(None).await.unwrap(), 15);
assert_eq!(result.num_inserted_rows, 5);
assert_eq!(result.num_updated_rows, 0);
assert_eq!(result.num_deleted_rows, 0);
assert_eq!(result.num_attempts, 1);
let new_batches = merge_insert_test_batches(15, 2);
let mut merge_insert_builder = table.merge_insert(&["i"]);
merge_insert_builder.when_matched_update_all(None);
merge_insert_builder.execute(new_batches).await.unwrap();
assert_eq!(table.count_rows(None).await.unwrap(), 15);
assert_eq!(
table.count_rows(Some("age = 2".to_string())).await.unwrap(),
0
);
let new_batches = merge_insert_test_batches(5, 3);
let mut merge_insert_builder = table.merge_insert(&["i"]);
merge_insert_builder.when_matched_update_all(Some("target.age = 0".to_string()));
merge_insert_builder.execute(new_batches).await.unwrap();
assert_eq!(
table.count_rows(Some("age = 3".to_string())).await.unwrap(),
5
);
}
#[tokio::test]
async fn test_merge_insert_use_index() {
let conn = connect("memory://").execute().await.unwrap();
let batches = merge_insert_test_batches(0, 0);
let table = conn
.create_table("my_table", batches)
.execute()
.await
.unwrap();
assert_eq!(table.count_rows(None).await.unwrap(), 10);
let new_batches = merge_insert_test_batches(5, 1);
let mut merge_insert_builder = table.merge_insert(&["i"]);
merge_insert_builder.when_not_matched_insert_all();
merge_insert_builder.use_index(true);
merge_insert_builder.execute(new_batches).await.unwrap();
assert_eq!(table.count_rows(None).await.unwrap(), 15);
let new_batches = merge_insert_test_batches(15, 2);
let mut merge_insert_builder = table.merge_insert(&["i"]);
merge_insert_builder.when_not_matched_insert_all();
merge_insert_builder.use_index(false);
merge_insert_builder.execute(new_batches).await.unwrap();
assert_eq!(table.count_rows(None).await.unwrap(), 25);
}
#[tokio::test]
async fn test_merge_insert_expr() {
use datafusion_expr::{col, lit};
let conn = connect("memory://").execute().await.unwrap();
let batches = merge_insert_test_batches(0, 0);
let table = conn
.create_table("my_table_expr", batches)
.execute()
.await
.unwrap();
assert_eq!(table.count_rows(None).await.unwrap(), 10);
let new_batches = merge_insert_test_batches(5, 3);
let mut merge_insert_builder = table.merge_insert(&["i"]);
let expr = col("target.age").eq(lit(0));
merge_insert_builder.when_matched_update_all_expr(expr);
merge_insert_builder.execute(new_batches).await.unwrap();
assert_eq!(
table.count_rows(Some("age = 3".to_string())).await.unwrap(),
5
);
let new_batches = merge_insert_test_batches(10, 0); let mut merge_insert_builder = table.merge_insert(&["i"]);
let delete_expr = col("target.age").eq(lit(3));
merge_insert_builder.when_not_matched_by_source_delete_expr(delete_expr);
let result = merge_insert_builder.execute(new_batches).await.unwrap();
assert_eq!(result.num_deleted_rows, 5);
assert_eq!(table.count_rows(None).await.unwrap(), 5);
}
}
#[cfg(test)]
mod lsm_tests {
use std::sync::Arc;
use arrow_array::{
Int64Array, RecordBatch, RecordBatchIterator, RecordBatchReader, StringArray,
};
use arrow_schema::{DataType, Field, Schema};
use tempfile::{TempDir, tempdir};
use crate::connect;
use crate::error::Error;
use crate::table::{LsmWriteSpec, Table};
fn id_value_reader(ids: Vec<i64>) -> Box<dyn RecordBatchReader + Send> {
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int64, false),
Field::new("value", DataType::Int64, false),
]));
let n = ids.len() as i64;
let batch = RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(Int64Array::from(ids)),
Arc::new(Int64Array::from_iter_values(0..n)),
],
)
.unwrap();
Box::new(RecordBatchIterator::new(vec![Ok(batch)], schema))
}
fn id_region_reader(rows: Vec<(i64, &str)>) -> Box<dyn RecordBatchReader + Send> {
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int64, false),
Field::new("region", DataType::Utf8, false),
]));
let ids: Vec<i64> = rows.iter().map(|(id, _)| *id).collect();
let regions: Vec<&str> = rows.iter().map(|(_, region)| *region).collect();
let batch = RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(Int64Array::from(ids)),
Arc::new(StringArray::from(regions)),
],
)
.unwrap();
Box::new(RecordBatchIterator::new(vec![Ok(batch)], schema))
}
fn id_region_multi_reader(batches: Vec<Vec<(i64, &str)>>) -> Box<dyn RecordBatchReader + Send> {
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int64, false),
Field::new("region", DataType::Utf8, false),
]));
let records: Vec<_> = batches
.into_iter()
.map(|rows| {
let ids: Vec<i64> = rows.iter().map(|(id, _)| *id).collect();
let regions: Vec<&str> = rows.iter().map(|(_, region)| *region).collect();
Ok(RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(Int64Array::from(ids)),
Arc::new(StringArray::from(regions)),
],
)
.unwrap())
})
.collect();
Box::new(RecordBatchIterator::new(records, schema))
}
async fn id_value_table(dir: &TempDir) -> Table {
let conn = connect(dir.path().to_str().unwrap())
.execute()
.await
.unwrap();
let table = conn
.create_table("t", id_value_reader(vec![1, 2, 3]))
.execute()
.await
.unwrap();
table.set_unenforced_primary_key(["id"]).await.unwrap();
table
}
#[tokio::test]
async fn lsm_merge_insert_bucket() {
let dir = tempdir().unwrap();
let table = id_value_table(&dir).await;
table
.set_lsm_write_spec(LsmWriteSpec::bucket("id", 1))
.await
.unwrap();
let mut builder = table.merge_insert(&[]);
builder
.when_matched_update_all(None)
.when_not_matched_insert_all();
let result = builder
.execute(id_value_reader(vec![3, 4, 5]))
.await
.unwrap();
assert_eq!(result.num_rows, 3);
assert_eq!(result.version, 0);
assert_eq!(result.num_inserted_rows, 0);
assert_eq!(result.num_updated_rows, 0);
}
#[tokio::test]
async fn lsm_merge_insert_unsharded() {
let dir = tempdir().unwrap();
let table = id_value_table(&dir).await;
table
.set_lsm_write_spec(LsmWriteSpec::unsharded())
.await
.unwrap();
let mut builder = table.merge_insert(&["id"]);
builder
.when_matched_update_all(None)
.when_not_matched_insert_all();
let result = builder
.execute(id_value_reader(vec![10, 11, 12, 13]))
.await
.unwrap();
assert_eq!(result.num_rows, 4);
}
#[tokio::test]
async fn lsm_merge_insert_identity() {
let dir = tempdir().unwrap();
let conn = connect(dir.path().to_str().unwrap())
.execute()
.await
.unwrap();
let table = conn
.create_table("t", id_region_reader(vec![(1, "us"), (2, "us")]))
.execute()
.await
.unwrap();
table.set_unenforced_primary_key(["id"]).await.unwrap();
table
.set_lsm_write_spec(LsmWriteSpec::identity("region"))
.await
.unwrap();
let mut builder = table.merge_insert(&[]);
builder
.when_matched_update_all(None)
.when_not_matched_insert_all();
let result = builder
.execute(id_region_reader(vec![(3, "us"), (4, "us")]))
.await
.unwrap();
assert_eq!(result.num_rows, 2);
}
#[tokio::test]
async fn lsm_merge_insert_use_lsm_write_false_falls_back() {
let dir = tempdir().unwrap();
let table = id_value_table(&dir).await;
table
.set_lsm_write_spec(LsmWriteSpec::bucket("id", 1))
.await
.unwrap();
let mut builder = table.merge_insert(&["id"]);
builder.when_not_matched_insert_all().use_lsm_write(false);
let result = builder
.execute(id_value_reader(vec![3, 4, 5]))
.await
.unwrap();
assert_eq!(result.num_inserted_rows, 2);
assert_eq!(table.count_rows(None).await.unwrap(), 5);
}
#[tokio::test]
async fn lsm_merge_insert_rejects_on_not_primary_key() {
let dir = tempdir().unwrap();
let table = id_value_table(&dir).await;
table
.set_lsm_write_spec(LsmWriteSpec::bucket("id", 1))
.await
.unwrap();
let mut builder = table.merge_insert(&["value"]);
builder
.when_matched_update_all(None)
.when_not_matched_insert_all();
let err = builder.execute(id_value_reader(vec![1])).await.unwrap_err();
assert!(matches!(err, Error::InvalidInput { .. }), "got {err:?}");
}
#[tokio::test]
async fn lsm_merge_insert_rejects_non_upsert() {
let dir = tempdir().unwrap();
let table = id_value_table(&dir).await;
table
.set_lsm_write_spec(LsmWriteSpec::bucket("id", 1))
.await
.unwrap();
let mut builder = table.merge_insert(&[]);
builder.when_not_matched_insert_all();
let err = builder.execute(id_value_reader(vec![4])).await.unwrap_err();
assert!(matches!(err, Error::InvalidInput { .. }), "got {err:?}");
}
#[tokio::test]
async fn lsm_close_writers_then_reopen() {
let dir = tempdir().unwrap();
let table = id_value_table(&dir).await;
table
.set_lsm_write_spec(LsmWriteSpec::bucket("id", 1))
.await
.unwrap();
let mut builder = table.merge_insert(&[]);
builder
.when_matched_update_all(None)
.when_not_matched_insert_all();
builder.execute(id_value_reader(vec![7, 8])).await.unwrap();
table.close_lsm_writers().await.unwrap();
let mut builder = table.merge_insert(&[]);
builder
.when_matched_update_all(None)
.when_not_matched_insert_all();
let result = builder.execute(id_value_reader(vec![9])).await.unwrap();
assert_eq!(result.num_rows, 1);
}
#[tokio::test]
async fn lsm_merge_insert_multi_batch() {
let dir = tempdir().unwrap();
let conn = connect(dir.path().to_str().unwrap())
.execute()
.await
.unwrap();
let table = conn
.create_table("t", id_region_reader(vec![(1, "us")]))
.execute()
.await
.unwrap();
table.set_unenforced_primary_key(["id"]).await.unwrap();
table
.set_lsm_write_spec(LsmWriteSpec::identity("region"))
.await
.unwrap();
let mut builder = table.merge_insert(&[]);
builder
.when_matched_update_all(None)
.when_not_matched_insert_all();
let result = builder
.execute(id_region_multi_reader(vec![
vec![(2, "us"), (3, "us")],
vec![(4, "us")],
]))
.await
.unwrap();
assert_eq!(result.num_rows, 3);
let mut builder = table.merge_insert(&[]);
builder
.when_matched_update_all(None)
.when_not_matched_insert_all();
let err = builder
.execute(id_region_multi_reader(vec![
vec![(5, "us")],
vec![(6, "eu")],
]))
.await
.unwrap_err();
assert!(matches!(err, Error::InvalidInput { .. }), "got {err:?}");
}
#[tokio::test]
async fn lsm_merge_insert_use_lsm_write_true_requires_spec() {
let dir = tempdir().unwrap();
let table = id_value_table(&dir).await;
let mut builder = table.merge_insert(&["id"]);
builder
.when_matched_update_all(None)
.when_not_matched_insert_all()
.use_lsm_write(true);
let err = builder.execute(id_value_reader(vec![4])).await.unwrap_err();
assert!(matches!(err, Error::InvalidInput { .. }), "got {err:?}");
}
#[tokio::test]
async fn lsm_merge_insert_rejects_second_shard() {
let dir = tempdir().unwrap();
let conn = connect(dir.path().to_str().unwrap())
.execute()
.await
.unwrap();
let table = conn
.create_table("t", id_region_reader(vec![(1, "us")]))
.execute()
.await
.unwrap();
table.set_unenforced_primary_key(["id"]).await.unwrap();
table
.set_lsm_write_spec(LsmWriteSpec::identity("region"))
.await
.unwrap();
let mut builder = table.merge_insert(&[]);
builder
.when_matched_update_all(None)
.when_not_matched_insert_all();
builder
.execute(id_region_reader(vec![(2, "us")]))
.await
.unwrap();
let mut builder = table.merge_insert(&[]);
builder
.when_matched_update_all(None)
.when_not_matched_insert_all();
let err = builder
.execute(id_region_reader(vec![(3, "eu")]))
.await
.unwrap_err();
assert!(matches!(err, Error::InvalidInput { .. }), "got {err:?}");
table.close_lsm_writers().await.unwrap();
let mut builder = table.merge_insert(&[]);
builder
.when_matched_update_all(None)
.when_not_matched_insert_all();
builder
.execute(id_region_reader(vec![(4, "eu")]))
.await
.unwrap();
}
}