use std::{
hash::{Hash, Hasher},
ops::Deref,
sync::Arc,
};
use futures::future::join_all;
use rayon::prelude::*;
use rspack_collections::IdentifierMap;
use rspack_core::{
ChunkByUkey, ChunkUkey, Compilation, ExportsInfoArtifact, Module, ModuleIdentifier,
RuntimeKeyMap, UsageKey, get_runtime_key,
};
use rspack_error::{Result, ToStringResultToRspackResultExt};
use rspack_util::{fx_hash::FxDashMap, tracing_preset::TRACING_BENCH_TARGET};
use rustc_hash::{FxHashMap, FxHashSet, FxHasher};
use tracing::instrument;
use super::ModuleGroupMap;
use crate::{
SplitChunksPlugin,
common::{ChunkFilter, ModuleChunks, ModuleSizes},
min_size::remove_min_size_violating_modules,
module_group::{IndexedCacheGroup, ModuleGroup, ModuleGroupKey, compare_entries},
options::{
cache_group::CacheGroup,
cache_group_test::{CacheGroupTest, CacheGroupTestFnCtx},
chunk_name::{ChunkNameGetter, ChunkNameGetterFnCtx},
},
};
type ChunksKey = u64;
#[derive(Clone)]
struct ChunkCombination {
key: ChunksKey,
chunks: Arc<FxHashSet<ChunkUkey>>,
}
impl Deref for ChunkCombination {
type Target = FxHashSet<ChunkUkey>;
fn deref(&self) -> &Self::Target {
&self.chunks
}
}
enum SelectedChunks<'a> {
All(&'a ChunkCombination),
Filtered(Vec<ChunkUkey>),
}
enum SelectedChunksIter<'a> {
All(std::collections::hash_set::Iter<'a, ChunkUkey>),
Filtered(std::slice::Iter<'a, ChunkUkey>),
}
impl<'a> Iterator for SelectedChunksIter<'a> {
type Item = &'a ChunkUkey;
fn next(&mut self) -> Option<Self::Item> {
match self {
Self::All(iter) => iter.next(),
Self::Filtered(iter) => iter.next(),
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
match self {
Self::All(iter) => iter.size_hint(),
Self::Filtered(iter) => iter.size_hint(),
}
}
}
impl SelectedChunks<'_> {
fn len(&self) -> usize {
match self {
Self::All(chunks) => chunks.len(),
Self::Filtered(chunks) => chunks.len(),
}
}
fn iter(&self) -> SelectedChunksIter<'_> {
match self {
Self::All(chunks) => SelectedChunksIter::All(chunks.iter()),
Self::Filtered(chunks) => SelectedChunksIter::Filtered(chunks.iter()),
}
}
fn key(&self) -> Option<ChunksKey> {
match self {
Self::All(chunks) => Some(chunks.key),
Self::Filtered(_) => None,
}
}
}
struct MatchedItem<'a> {
module: &'a dyn Module,
cache_group_index: u32,
cache_group: &'a CacheGroup,
selected_chunks: SelectedChunks<'a>,
}
fn get_key<I: Iterator<Item = ChunkUkey>>(
chunks: I,
chunk_index_map: &FxHashMap<ChunkUkey, u32>,
) -> ChunksKey {
let mut sorted_chunk_ukeys = chunks
.map(|chunk| {
*chunk_index_map
.get(&chunk)
.expect("should already have index for chunk ukey")
})
.collect::<Vec<_>>();
sorted_chunk_ukeys.sort_unstable();
let mut hasher = FxHasher::default();
for chunk_ukey in sorted_chunk_ukeys {
chunk_ukey.hash(&mut hasher);
}
hasher.finish()
}
#[derive(Default)]
pub(crate) struct Combinator {
combinations: FxHashMap<ChunksKey, Vec<ChunkCombination>>,
used_exports_combinations: FxHashMap<ChunksKey, Vec<ChunkCombination>>,
grouped_by_exports: Vec<Vec<ChunksKey>>,
}
enum ChunkCombinations<'a> {
Slice(&'a [ChunkCombination]),
UsedExports(Vec<&'a ChunkCombination>),
}
enum ChunkCombinationsIter<'a> {
Slice(std::slice::Iter<'a, ChunkCombination>),
UsedExports(std::iter::Copied<std::slice::Iter<'a, &'a ChunkCombination>>),
}
impl<'a> Iterator for ChunkCombinationsIter<'a> {
type Item = &'a ChunkCombination;
fn next(&mut self) -> Option<Self::Item> {
match self {
Self::Slice(iter) => iter.next(),
Self::UsedExports(iter) => iter.next(),
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
match self {
Self::Slice(iter) => iter.size_hint(),
Self::UsedExports(iter) => iter.size_hint(),
}
}
}
impl<'a> IntoIterator for &'a ChunkCombinations<'a> {
type Item = &'a ChunkCombination;
type IntoIter = ChunkCombinationsIter<'a>;
fn into_iter(self) -> Self::IntoIter {
match self {
ChunkCombinations::Slice(combs) => ChunkCombinationsIter::Slice(combs.iter()),
ChunkCombinations::UsedExports(combs) => {
ChunkCombinationsIter::UsedExports(combs.iter().copied())
}
}
}
}
impl Combinator {
fn get_non_used_exports_combs(
&self,
module_index: usize,
module_chunks: &ModuleChunks,
chunk_index_map: &FxHashMap<ChunkUkey, u32>,
) -> &[ChunkCombination] {
let chunks = module_chunks
.get(module_index)
.expect("should have module chunks");
let chunks_key = get_key(chunks.iter().copied(), chunk_index_map);
self
.combinations
.get(&chunks_key)
.expect("should have combinations")
}
fn get_used_exports_combs(&self, module_index: usize) -> Vec<&ChunkCombination> {
let mut result = vec![];
let chunks_by_module_used = self
.grouped_by_exports
.get(module_index)
.expect("should have exports for module");
for chunks_key in chunks_by_module_used.iter() {
let combs = self
.used_exports_combinations
.get(chunks_key)
.expect("should have combinations");
result.extend(combs.iter());
}
result
}
fn group_chunks_by_exports(
module_identifier: &ModuleIdentifier,
module_chunks: impl Iterator<Item = ChunkUkey>,
exports_info_artifact: &ExportsInfoArtifact,
chunk_by_ukey: &ChunkByUkey,
chunk_index_map: &FxHashMap<ChunkUkey, u32>,
) -> Vec<ChunkCombination> {
let exports_info = exports_info_artifact.get_exports_info_data(module_identifier);
let mut grouped_by_used_exports: FxHashMap<UsageKey, FxHashSet<ChunkUkey>> = Default::default();
let mut runtime_key_map = RuntimeKeyMap::default();
for chunk_ukey in module_chunks {
let chunk = chunk_by_ukey.expect_get(&chunk_ukey);
let runtime = chunk.runtime();
let usage_key = runtime_key_map
.entry(get_runtime_key(runtime).clone())
.or_insert_with(|| exports_info.get_usage_key(Some(runtime)))
.clone();
grouped_by_used_exports
.entry(usage_key)
.or_default()
.insert(chunk_ukey);
}
grouped_by_used_exports
.into_values()
.map(|chunks| ChunkCombination {
key: get_key(chunks.iter().copied(), chunk_index_map),
chunks: Arc::new(chunks),
})
.collect()
}
fn get_combs(
&self,
module_index: usize,
used_exports: bool,
module_chunks: &ModuleChunks,
chunk_index_map: &FxHashMap<ChunkUkey, u32>,
) -> ChunkCombinations<'_> {
if used_exports {
ChunkCombinations::UsedExports(self.get_used_exports_combs(module_index))
} else {
ChunkCombinations::Slice(self.get_non_used_exports_combs(
module_index,
module_chunks,
chunk_index_map,
))
}
}
fn get_combinations(
chunk_sets_in_graph: FxHashMap<ChunksKey, ChunkCombination>,
chunk_sets_by_count: Vec<ChunkCombination>,
) -> FxHashMap<ChunksKey, Vec<ChunkCombination>> {
chunk_sets_in_graph
.into_par_iter()
.map(|(chunks_key, chunks_set)| {
let mut result = vec![];
let chunks_set_len = chunks_set.len();
for set in &chunk_sets_by_count {
if set.len() >= chunks_set_len {
break;
}
if set.is_subset(&chunks_set) {
result.push(set.clone());
}
}
result.push(chunks_set);
(chunks_key, result)
})
.collect::<FxHashMap<_, _>>()
}
pub(crate) fn prepare_group_by_chunks(
&mut self,
all_modules: &[ModuleIdentifier],
module_chunks: &ModuleChunks,
chunk_index_map: &FxHashMap<ChunkUkey, u32>,
) {
let chunk_sets_in_graph = all_modules
.par_iter()
.enumerate()
.filter_map(|(module_index, _)| {
let chunks = module_chunks
.get(module_index)
.expect("should have module chunks");
if chunks.is_empty() {
return None;
}
let chunk_key = get_key(chunks.iter().copied(), chunk_index_map);
Some((
chunk_key,
ChunkCombination {
key: chunk_key,
chunks: Arc::new(chunks.clone()),
},
))
})
.collect::<FxHashMap<_, _>>();
let mut chunk_sets_by_count = Vec::<ChunkCombination>::with_capacity(chunk_sets_in_graph.len());
for chunks in chunk_sets_in_graph.values() {
chunk_sets_by_count.push(chunks.clone());
}
chunk_sets_by_count.sort_unstable_by_key(|chunks| chunks.len());
self.combinations = Self::get_combinations(chunk_sets_in_graph, chunk_sets_by_count);
}
pub(crate) fn prepare_group_by_used_exports(
&mut self,
all_modules: &[ModuleIdentifier],
exports_info_artifact: &ExportsInfoArtifact,
chunk_by_ukey: &ChunkByUkey,
module_chunks: &ModuleChunks,
chunk_index_map: &FxHashMap<ChunkUkey, u32>,
) {
let (grouped_by_exports, used_exports_chunks): (Vec<_>, Vec<_>) = all_modules
.par_iter()
.enumerate()
.map(|(module_index, module)| {
let grouped_chunks = Self::group_chunks_by_exports(
module,
module_chunks
.get(module_index)
.expect("should have module chunks")
.iter()
.copied(),
exports_info_artifact,
chunk_by_ukey,
chunk_index_map,
);
let mut grouped_chunks_key = Vec::with_capacity(grouped_chunks.len());
let mut used_exports_chunks = Vec::with_capacity(grouped_chunks.len());
for chunks in grouped_chunks {
if chunks.is_empty() {
continue;
}
grouped_chunks_key.push(chunks.key);
used_exports_chunks.push(chunks);
}
(grouped_chunks_key, used_exports_chunks)
})
.unzip();
self.grouped_by_exports = grouped_by_exports;
let mut used_exports_chunk_sets_in_graph = FxHashMap::default();
let mut used_exports_chunk_sets_by_count = Vec::<ChunkCombination>::default();
for used_exports_chunks in used_exports_chunks {
for chunks in used_exports_chunks {
let chunk_key = chunks.key;
if let std::collections::hash_map::Entry::Vacant(entry) =
used_exports_chunk_sets_in_graph.entry(chunk_key)
{
used_exports_chunk_sets_by_count.push(chunks.clone());
entry.insert(chunks);
}
}
}
used_exports_chunk_sets_by_count.sort_unstable_by_key(|chunks| chunks.len());
self.used_exports_combinations = Self::get_combinations(
used_exports_chunk_sets_in_graph,
used_exports_chunk_sets_by_count,
);
}
}
impl SplitChunksPlugin {
pub(crate) fn find_best_module_group(
&self,
module_group_map: &mut ModuleGroupMap,
) -> (ModuleGroupKey, ModuleGroup) {
debug_assert!(!module_group_map.is_empty());
let mut best_entry_index = 0;
for entry_index in 1..module_group_map.len() {
let [(entry_key, entry), (best_entry_key, best_entry)] = module_group_map
.get_disjoint_indices_mut([entry_index, best_entry_index])
.expect("entry indices should be valid and unique");
let result = compare_entries((entry_key, entry), (best_entry_key, best_entry));
if result > 0f64 {
best_entry_index = entry_index;
}
}
module_group_map
.swap_remove_index(best_entry_index)
.expect("This should never happen, please file an issue")
}
#[allow(clippy::too_many_arguments)]
#[instrument(name = "Compilation:SplitChunks:prepare_module_group_map",target=TRACING_BENCH_TARGET, skip_all)]
pub(crate) async fn prepare_module_group_map(
&self,
combinator: &Combinator,
all_modules: &[ModuleIdentifier],
cache_groups: Vec<IndexedCacheGroup<'_>>,
removed_module_chunks: &IdentifierMap<FxHashSet<ChunkUkey>>,
compilation: &Compilation,
module_chunks: &ModuleChunks,
chunk_index_map: &FxHashMap<ChunkUkey, u32>,
) -> Result<ModuleGroupMap> {
let module_graph = compilation.get_module_graph();
let module_group_map: FxDashMap<ModuleGroupKey, ModuleGroup> = FxDashMap::default();
let module_group_results = rspack_parallel::scope::<_, Result<_>>(|token| {
all_modules.iter().enumerate().for_each(|(module_index, mid)| {
let s = unsafe { token.used((&cache_groups, module_index, mid, &module_graph, compilation, &module_group_map, &combinator, module_chunks, removed_module_chunks, chunk_index_map)) };
s.spawn(|(cache_groups, module_index, mid, module_graph, compilation, module_group_map, combinator, module_chunks, removed_module_chunks, chunk_index_map)| async move {
let belong_to_chunks = module_chunks
.get(module_index)
.expect("should have module chunks");
if belong_to_chunks.is_empty() {
return Ok(());
}
if let Some(removed_chunks) = removed_module_chunks.get(mid) && belong_to_chunks.iter().all(|c| removed_chunks.contains(c)) {
return Ok(());
}
let module = module_graph.module_by_identifier(mid).expect("should have module").as_ref();
let mut used_exports_combs = None;
let mut non_used_exports_combs = None;
for cache_group in cache_groups.iter() {
if !(cache_group.cache_group.r#type)(module) {
continue;
}
if !(cache_group.cache_group.layer)(module.get_layer().map(ToString::to_string)).await? {
continue;
}
let is_match = match &cache_group.cache_group.test {
CacheGroupTest::String(str) => module
.name_for_condition().is_some_and(|name| name.starts_with(str)),
CacheGroupTest::RegExp(regexp) => module
.name_for_condition().is_some_and(|name| regexp.test(&name)),
CacheGroupTest::Fn(f) => {
let ctx = CacheGroupTestFnCtx { compilation, module };
f(ctx).await?.unwrap_or_default()
}
CacheGroupTest::Enabled => true,
};
if !is_match {
continue;
}
let IndexedCacheGroup {
cache_group_index,
cache_group,
} = cache_group;
let combs = if cache_group.used_exports {
if used_exports_combs.is_none() {
used_exports_combs = Some(combinator.get_combs(
module_index,
true,
module_chunks,
chunk_index_map,
));
}
used_exports_combs.as_ref().expect("should have used_exports_combs")
} else {
if non_used_exports_combs.is_none() {
non_used_exports_combs = Some(combinator.get_combs(
module_index,
false,
module_chunks,
chunk_index_map,
));
}
non_used_exports_combs.as_ref().expect("should have non_used_exports_combs")
};
for chunk_combination in combs {
if chunk_combination.is_empty() {
continue;
}
if chunk_combination.len() < cache_group.min_chunks as usize {
tracing::trace!(
"Module({:?}) is ignored by CacheGroup({:?}). Reason: chunk_combination.len({:?}) < cache_group.min_chunks({:?})",
mid,
cache_group.key,
chunk_combination.len(),
cache_group.min_chunks,
);
continue;
}
let selected_chunks = if matches!(&cache_group.chunk_filter, ChunkFilter::All) {
SelectedChunks::All(chunk_combination)
} else if cache_group.chunk_filter.is_func() {
SelectedChunks::Filtered(
join_all(chunk_combination.iter().map(|c| async move {
cache_group.chunk_filter.test_func(c, compilation).await.map(|filtered| (c, filtered))
}))
.await
.into_iter()
.collect::<Result<Vec<_>>>()?
.into_iter()
.filter_map(
|(chunk, filtered)| {
if filtered {
Some(chunk)
} else {
None
}
}
).copied().collect::<Vec<_>>(),
)
} else {
SelectedChunks::Filtered(
chunk_combination.iter().filter(|c| {
cache_group.chunk_filter.test_internal(c, compilation)
}).copied().collect::<Vec<_>>(),
)
};
if selected_chunks.len() < cache_group.min_chunks as usize {
tracing::trace!(
"Module({:?}) is ignored by CacheGroup({:?}). Reason: selected_chunks.len({:?}) < cache_group.min_chunks({:?})",
mid,
cache_group.key,
selected_chunks.len(),
cache_group.min_chunks,
);
continue;
}
if selected_chunks.iter().any(|c| removed_module_chunks.get(mid).is_some_and(|chunks| chunks.contains(c))) {
continue;
}
merge_matched_item_into_module_group_map(
MatchedItem {
module,
cache_group,
cache_group_index: *cache_group_index,
selected_chunks,
},
module_group_map,
compilation,
chunk_index_map,
).await?;
}
}
Ok(())
});
})
})
.await
.into_iter().map(|r| r.to_rspack_result())
.collect::<Result<Vec<_>>>()?;
for result in module_group_results {
result?;
}
let module_group_count = module_group_map.len();
let mut result = Vec::with_capacity(module_group_count);
result.extend(module_group_map);
result.sort_by(|a, b| a.0.cmp(&b.0));
let mut ordered_result =
ModuleGroupMap::with_capacity_and_hasher(module_group_count, Default::default());
ordered_result.extend(result);
Ok(ordered_result)
}
pub(crate) fn remove_all_modules_from_other_module_groups(
&self,
current_module_group: &ModuleGroup,
module_group_map: &mut ModuleGroupMap,
used_chunks: &FxHashSet<ChunkUkey>,
compilation: &Compilation,
module_sizes: &ModuleSizes,
) {
let keys_of_invalid_group = module_group_map
.par_iter_mut()
.filter_map(|(key, other_module_group)| {
other_module_group
.chunks
.intersection(used_chunks)
.next()?;
let module_count = other_module_group.modules.len();
let duplicated_modules = if other_module_group.modules.len() > current_module_group.modules.len() {
current_module_group.modules.intersection(&other_module_group.modules).copied().collect::<Vec<_>>()
} else {
other_module_group.modules.intersection(¤t_module_group.modules).copied().collect::<Vec<_>>()
};
for module in duplicated_modules {
other_module_group.remove_module(module);
}
if module_count == other_module_group.modules.len() {
return None;
}
if other_module_group.modules.is_empty() {
tracing::trace!(
"{key} is deleted for having empty modules",
);
return Some(key.clone());
}
tracing::trace!("other_module_group: {other_module_group:#?}");
tracing::trace!("item.modules: {:#?}", current_module_group.modules);
other_module_group.chunks.retain(|c| {
compilation.build_chunk_graph_artifact.chunk_graph
.is_any_module_in_chunk(other_module_group.modules.iter(), *c)
});
let cache_group = other_module_group.get_cache_group(&self.cache_groups);
if other_module_group.chunks.len() < cache_group.min_chunks as usize {
tracing::trace!(
"{key} is deleted for each_module_group.chunks.len()({:?}) < cache_group.min_chunks({:?})",
other_module_group.chunks.len(),
cache_group.min_chunks
);
return Some(key.clone());
}
if remove_min_size_violating_modules(key, other_module_group, cache_group, module_sizes) {
tracing::trace!(
"{key} is deleted for violating min_size {:#?}",
cache_group.min_size,
);
return Some(key.clone());
}
let chunks_len = other_module_group.chunks.len();
if !Self::check_min_size_reduction(
other_module_group.get_sizes(module_sizes),
&cache_group.min_size_reduction,
chunks_len,
) {
tracing::trace!(
"{key} is deleted for violating min_size {:#?}",
cache_group.min_size,
);
return Some(key.clone());
}
None
})
.collect::<Vec<_>>();
keys_of_invalid_group.into_iter().for_each(|key| {
module_group_map.swap_remove(&key);
});
}
}
async fn merge_matched_item_into_module_group_map(
matched_item: MatchedItem<'_>,
module_group_map: &FxDashMap<ModuleGroupKey, ModuleGroup>,
compilation: &Compilation,
chunk_index_map: &FxHashMap<ChunkUkey, u32>,
) -> Result<()> {
let MatchedItem {
module,
cache_group_index,
cache_group,
selected_chunks,
} = matched_item;
let selected_chunks_for_name;
let chunk_name = match &cache_group.name {
ChunkNameGetter::String(name) => Some(name.clone()),
ChunkNameGetter::Disabled => None,
ChunkNameGetter::Fn(f) => {
selected_chunks_for_name = selected_chunks.iter().copied().collect::<Vec<_>>();
let ctx = ChunkNameGetterFnCtx {
module,
chunks: &selected_chunks_for_name,
cache_group_key: &cache_group.key,
compilation,
};
f(ctx).await?
}
};
let key = if let Some(cache_group_name) = &chunk_name {
ModuleGroupKey::Named {
cache_group_index,
chunk_name: cache_group_name.clone(),
}
} else {
ModuleGroupKey::Anonymous {
cache_group_index,
chunks_key: selected_chunks
.key()
.unwrap_or_else(|| get_key(selected_chunks.iter().copied(), chunk_index_map)),
}
};
let mut module_group = {
module_group_map
.entry(key)
.or_insert_with(|| ModuleGroup::new(chunk_name, cache_group_index, cache_group))
};
module_group.add_module(module.identifier());
module_group.chunks.extend(selected_chunks.iter().copied());
Ok(())
}