use std::sync::Arc;
use crate::{
config::ConfigOptions,
error::Result,
physical_plan::{
coalesce_batches::CoalesceBatchesExec, filter::FilterExec, joins::HashJoinExec,
repartition::RepartitionExec, Partitioning,
},
};
use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
use datafusion_physical_optimizer::PhysicalOptimizerRule;
#[derive(Default)]
pub struct CoalesceBatches {}
impl CoalesceBatches {
#[allow(missing_docs)]
pub fn new() -> Self {
Self::default()
}
}
impl PhysicalOptimizerRule for CoalesceBatches {
fn optimize(
&self,
plan: Arc<dyn crate::physical_plan::ExecutionPlan>,
config: &ConfigOptions,
) -> Result<Arc<dyn crate::physical_plan::ExecutionPlan>> {
if !config.execution.coalesce_batches {
return Ok(plan);
}
let target_batch_size = config.execution.batch_size;
plan.transform_up(|plan| {
let plan_any = plan.as_any();
let wrap_in_coalesce = plan_any.downcast_ref::<FilterExec>().is_some()
|| plan_any.downcast_ref::<HashJoinExec>().is_some()
|| plan_any
.downcast_ref::<RepartitionExec>()
.map(|repart_exec| {
!matches!(
repart_exec.partitioning().clone(),
Partitioning::RoundRobinBatch(_)
)
})
.unwrap_or(false);
if wrap_in_coalesce {
Ok(Transformed::yes(Arc::new(CoalesceBatchesExec::new(
plan,
target_batch_size,
))))
} else {
Ok(Transformed::no(plan))
}
})
.data()
}
fn name(&self) -> &str {
"coalesce_batches"
}
fn schema_check(&self) -> bool {
true
}
}