use std::{
borrow::Cow,
cmp::Ordering,
hash::{Hash, Hasher},
sync::Arc,
};
use futures::future::BoxFuture;
use itertools::{
EitherOrBoth::{Both, Left, Right},
Itertools,
};
use rspack_collections::Identifier;
use rspack_core::{
BoxModule, Chunk, ChunkByUkey, ChunkGraph, ChunkGroupByUkey, ChunkGroupUkey,
ChunkNamedIdArtifact, ChunkUkey, Compilation, CompilerId, ExportsInfoArtifact, Module,
ModuleGraph, ModuleGraphCacheArtifact, ModuleIdentifier, ModuleIdsArtifact,
SideEffectsStateArtifact, compare_runtime,
};
use rspack_error::Result;
use rspack_util::{
comparators::{compare_ids, compare_numbers},
identifier::make_paths_relative,
number_hash::get_number_hash_combined,
};
use rustc_hash::{FxHashMap, FxHashSet, FxHasher};
pub type ModuleFilterFn =
Arc<dyn for<'a> Fn(CompilerId, &'a dyn Module) -> BoxFuture<'a, Result<bool>> + Send + Sync>;
#[allow(clippy::type_complexity)]
#[allow(clippy::collapsible_else_if)]
pub fn get_used_module_ids_and_modules(
compilation: &Compilation,
filter: Option<Box<dyn Fn(&BoxModule) -> bool>>,
) -> (FxHashSet<String>, Vec<ModuleIdentifier>) {
get_used_module_ids_and_modules_with_artifact(
compilation,
&compilation.module_ids_artifact,
filter,
)
}
#[allow(clippy::type_complexity)]
#[allow(clippy::collapsible_else_if)]
pub fn get_used_module_ids_and_modules_with_artifact(
compilation: &Compilation,
module_ids_artifact: &ModuleIdsArtifact,
filter: Option<Box<dyn Fn(&BoxModule) -> bool>>,
) -> (FxHashSet<String>, Vec<ModuleIdentifier>) {
let chunk_graph = &compilation.build_chunk_graph_artifact.chunk_graph;
let mut modules = vec![];
let mut used_ids = FxHashSet::default();
compilation
.get_module_graph()
.modules()
.map(|(_, module)| module)
.filter(|m| m.need_id())
.for_each(|module| {
let module_id = ChunkGraph::get_module_id(module_ids_artifact, module.identifier());
if let Some(module_id) = module_id {
used_ids.insert(module_id.to_string());
} else {
if filter.as_ref().is_none_or(|f| (f)(module))
&& chunk_graph.get_number_of_module_chunks(module.identifier()) != 0
{
modules.push(module.identifier());
}
}
});
(used_ids, modules)
}
pub async fn get_used_module_ids_and_modules_with_async_filter(
compilation: &Compilation,
module_ids_artifact: &ModuleIdsArtifact,
filter: Option<&ModuleFilterFn>,
) -> Result<(FxHashSet<String>, Vec<ModuleIdentifier>)> {
let chunk_graph = &compilation.build_chunk_graph_artifact.chunk_graph;
let mut modules = vec![];
let mut used_ids = FxHashSet::default();
for module in compilation
.get_module_graph()
.modules()
.map(|(_, module)| module)
.filter(|m| m.need_id())
{
let module_id = ChunkGraph::get_module_id(module_ids_artifact, module.identifier());
if let Some(module_id) = module_id {
used_ids.insert(module_id.to_string());
} else if chunk_graph.get_number_of_module_chunks(module.identifier()) != 0
&& match filter {
Some(filter) => filter(compilation.compiler_id(), module.as_ref()).await?,
None => true,
}
{
modules.push(module.identifier());
}
}
Ok((used_ids, modules))
}
pub fn get_short_module_name(module: &BoxModule, context: &str) -> String {
let lib_ident = module.lib_ident(rspack_core::LibIdentOptions { context });
if let Some(lib_ident) = lib_ident {
return avoid_number(&lib_ident).to_string();
};
let name_for_condition = module.name_for_condition();
if let Some(name_for_condition) = name_for_condition {
return avoid_number(&make_paths_relative(context, &name_for_condition)).to_string();
};
String::new()
}
fn avoid_number(s: &str) -> Cow<'_, str> {
if s.len() > 21 {
return Cow::Borrowed(s);
}
let first_char = s.chars().next();
if let Some(first_char) = first_char {
if first_char < '1' {
if first_char == '-' {
return Cow::Borrowed(s);
};
} else if first_char > '9' {
return Cow::Borrowed(s);
}
}
if s.chars().all(|c| c.is_ascii_digit()) {
return Cow::Owned(format!("_{s}"));
}
Cow::Borrowed(s)
}
pub fn get_long_module_name(short_name: &str, module: &BoxModule, context: &str) -> String {
let full_name = get_full_module_name(module, context);
format!("{}?{}", short_name, get_hash(full_name, 4))
}
pub fn get_full_module_name(module: &BoxModule, context: &str) -> String {
make_paths_relative(context, &module.identifier())
}
pub fn get_hash(s: impl Hash, length: usize) -> String {
let mut hasher = FxHasher::default();
s.hash(&mut hasher);
let hash = hasher.finish();
let mut hash_str = format!("{hash:x}");
if hash_str.len() > length {
hash_str.truncate(length);
}
hash_str
}
#[allow(clippy::too_many_arguments)]
pub fn assign_deterministic_ids<'a, T>(
mut items: Vec<T>,
get_name: impl Fn(&T) -> Cow<'a, str>,
comparator: impl FnMut(&T, &T) -> Ordering,
mut assign_id: impl FnMut(&T, usize) -> bool,
ranges: &[usize],
expand_factor: usize,
extra_space: usize,
salt: usize,
) {
items.sort_unstable_by(comparator);
let optimal_range = usize::min(items.len() * 20 + extra_space, usize::MAX);
let mut i = 0;
debug_assert!(!ranges.is_empty());
let mut range = ranges[i];
while range < optimal_range {
i += 1;
if i < ranges.len() {
range = usize::min(ranges[i], usize::MAX);
} else if expand_factor != 0 {
range = usize::min(range * expand_factor, usize::MAX);
} else {
break;
}
}
for item in items {
let ident = get_name(&item);
let mut i = salt;
let mut id = get_number_hash_combined(ident.as_ref(), i, range);
while !assign_id(&item, id) {
i += 1;
id = get_number_hash_combined(ident.as_ref(), i, range);
}
}
}
pub fn assign_ascending_module_ids(
used_ids: &FxHashSet<String>,
modules: Vec<&BoxModule>,
module_ids: &mut ModuleIdsArtifact,
) {
let mut next_id = 0;
let mut assign_id = |module: &BoxModule| {
if ChunkGraph::get_module_id(module_ids, module.identifier()).is_none() {
while used_ids.contains(&next_id.to_string()) {
next_id += 1;
}
ChunkGraph::set_module_id(module_ids, module.identifier(), next_id.to_string().into());
next_id += 1;
}
};
for module in modules {
assign_id(module);
}
}
pub fn compare_modules_by_pre_order_index_or_identifier(
module_graph: &ModuleGraph,
a: &Identifier,
b: &Identifier,
) -> std::cmp::Ordering {
if let Some(a) = module_graph.get_pre_order_index(a)
&& let Some(b) = module_graph.get_pre_order_index(b)
{
compare_numbers(a, b)
} else {
compare_ids(a, b)
}
}
#[cfg(test)]
mod tests {
use std::cmp::Ordering;
use rspack_core::{
Chunk, ChunkGraph, ChunkGroup, ChunkGroupByUkey, ChunkKind, ModuleIdentifier, ModuleIdsArtifact,
};
use rustc_hash::FxHashMap;
use super::{NaturalChunkCompareCache, assign_deterministic_ids, compare_chunks_natural};
#[test]
fn assign_deterministic_ids_accepts_borrowed_names() {
let items = vec![1usize, 2usize, 3usize];
let names = FxHashMap::from_iter([
(1usize, "module-a".to_string()),
(2usize, "module-b".to_string()),
(3usize, "module-c".to_string()),
]);
let mut assigned = FxHashMap::default();
assign_deterministic_ids(
items,
|item| std::borrow::Cow::Borrowed(names.get(item).expect("should have name").as_str()),
|a, b| a.cmp(b),
|item, id| {
assigned.insert(*item, id);
true
},
&[1000],
10,
0,
0,
);
assert_eq!(assigned.len(), 3);
}
#[test]
fn compare_chunks_natural_orders_by_modules_then_groups() {
let mut chunk_graph = ChunkGraph::default();
let mut module_ids = ModuleIdsArtifact::default();
let mut chunk_group_by_ukey = ChunkGroupByUkey::default();
let mut cache = NaturalChunkCompareCache::default();
let chunk_a = Chunk::new(None, ChunkKind::Normal);
let chunk_b = Chunk::new(None, ChunkKind::Normal);
let module_a: ModuleIdentifier = "module-a".into();
let module_b: ModuleIdentifier = "module-b".into();
chunk_graph.connect_chunk_and_module(chunk_a.ukey(), module_a);
chunk_graph.connect_chunk_and_module(chunk_b.ukey(), module_b);
ChunkGraph::set_module_id(&mut module_ids, module_a, "a".into());
ChunkGraph::set_module_id(&mut module_ids, module_b, "b".into());
assert_eq!(
compare_chunks_natural(
&chunk_graph,
&chunk_group_by_ukey,
&module_ids,
&chunk_a,
&chunk_b,
&mut cache,
),
Ordering::Less
);
let mut chunk_c = Chunk::new(None, ChunkKind::Normal);
let mut chunk_d = Chunk::new(None, ChunkKind::Normal);
let shared_module: ModuleIdentifier = "shared-module".into();
ChunkGraph::set_module_id(&mut module_ids, shared_module, "shared".into());
chunk_graph.connect_chunk_and_module(chunk_c.ukey(), shared_module);
chunk_graph.connect_chunk_and_module(chunk_d.ukey(), shared_module);
let mut group = ChunkGroup::default();
group.index = Some(1);
group.chunks = vec![chunk_c.ukey(), chunk_d.ukey()];
chunk_c.add_group(group.ukey());
chunk_d.add_group(group.ukey());
chunk_group_by_ukey.add(group);
let mut cache = NaturalChunkCompareCache::default();
assert_eq!(
compare_chunks_natural(
&chunk_graph,
&chunk_group_by_ukey,
&module_ids,
&chunk_c,
&chunk_d,
&mut cache,
),
Ordering::Less
);
}
}
#[allow(clippy::too_many_arguments)]
pub fn get_short_chunk_name(
chunk: &Chunk,
chunk_graph: &ChunkGraph,
context: &str,
delimiter: &str,
module_graph: &ModuleGraph,
module_graph_cache: &ModuleGraphCacheArtifact,
side_effects_state_artifact: &SideEffectsStateArtifact,
named_chunk_ids_artifact: &ChunkNamedIdArtifact,
exports_info_artifact: &ExportsInfoArtifact,
) -> String {
if let Some(name) = named_chunk_ids_artifact
.chunk_short_names
.get(&chunk.ukey())
{
return name.clone();
}
let modules = chunk_graph
.get_chunk_root_modules(
&chunk.ukey(),
module_graph,
module_graph_cache,
side_effects_state_artifact,
exports_info_artifact,
)
.iter()
.map(|id| {
module_graph
.module_by_identifier(id)
.unwrap_or_else(|| panic!("Module not found {id}"))
})
.collect::<Vec<_>>();
let short_module_names = modules
.iter()
.map(|module| {
let name = get_short_module_name(module, context);
request_to_id(&name)
})
.collect::<Vec<_>>();
let mut id_name_hints = Vec::from_iter(chunk.id_name_hints().clone());
id_name_hints.sort_unstable();
id_name_hints.extend(short_module_names);
let chunk_name = id_name_hints
.iter()
.filter(|id| !id.is_empty())
.join(delimiter);
shorten_long_string(chunk_name, delimiter)
}
pub fn shorten_long_string(string: String, delimiter: &str) -> String {
if string.len() < 100 {
string
} else {
format!(
"{}{}{}",
&string[..(100 - 6 - delimiter.len())],
delimiter,
get_hash(&string, 6)
)
}
}
#[allow(clippy::too_many_arguments)]
pub fn get_long_chunk_name(
chunk: &Chunk,
chunk_graph: &ChunkGraph,
context: &str,
delimiter: &str,
module_graph: &ModuleGraph,
module_graph_cache: &ModuleGraphCacheArtifact,
side_effects_state_artifact: &SideEffectsStateArtifact,
named_chunk_ids_artifact: &ChunkNamedIdArtifact,
exports_info_artifact: &ExportsInfoArtifact,
) -> String {
if let Some(name) = named_chunk_ids_artifact.chunk_long_names.get(&chunk.ukey()) {
return name.clone();
}
let modules = chunk_graph
.get_chunk_root_modules(
&chunk.ukey(),
module_graph,
module_graph_cache,
side_effects_state_artifact,
exports_info_artifact,
)
.iter()
.map(|id| {
module_graph
.module_by_identifier(id)
.expect("Module not found")
})
.collect::<Vec<_>>();
let short_module_names = modules
.iter()
.map(|m| request_to_id(&get_short_module_name(m, context)))
.collect::<Vec<_>>();
let long_module_names = modules
.iter()
.map(|m| request_to_id(&get_long_module_name("", m, context)))
.collect::<Vec<_>>();
let mut id_name_hints = chunk.id_name_hints().iter().cloned().collect::<Vec<_>>();
id_name_hints.sort_unstable();
let chunk_name = {
id_name_hints.extend(short_module_names);
id_name_hints.extend(long_module_names);
id_name_hints.join(delimiter)
};
shorten_long_string(chunk_name, delimiter)
}
pub fn get_full_chunk_name(
chunk: &Chunk,
chunk_graph: &ChunkGraph,
module_graph: &ModuleGraph,
module_graph_cache: &ModuleGraphCacheArtifact,
side_effects_state_artifact: &SideEffectsStateArtifact,
context: &str,
exports_info_artifact: &ExportsInfoArtifact,
) -> String {
if let Some(name) = chunk.name() {
return name.to_owned();
}
let full_module_names = chunk_graph
.get_chunk_root_modules(
&chunk.ukey(),
module_graph,
module_graph_cache,
side_effects_state_artifact,
exports_info_artifact,
)
.iter()
.filter_map(|id| module_graph.module_by_identifier(id))
.map(|module| get_full_module_name(module, context))
.collect::<Vec<_>>();
full_module_names.join(",")
}
pub use rspack_util::identifier::request_to_id;
pub fn get_used_chunk_ids(chunk_by_ukey: &ChunkByUkey) -> FxHashSet<String> {
let mut used_ids = FxHashSet::default();
for chunk in chunk_by_ukey.values() {
if let Some(id) = chunk.id() {
used_ids.insert(id.to_string());
}
}
used_ids
}
pub fn assign_ascending_chunk_ids(chunks: &[ChunkUkey], chunk_by_ukey: &mut ChunkByUkey) {
let used_ids = get_used_chunk_ids(chunk_by_ukey);
let mut next_id = 0;
if !used_ids.is_empty() {
for chunk in chunks {
let chunk = chunk_by_ukey.expect_get_mut(chunk);
if chunk.id().is_none() {
while used_ids.contains(&next_id.to_string()) {
next_id += 1;
}
chunk.set_id(next_id.to_string());
next_id += 1;
}
}
} else {
for chunk in chunks {
let chunk = chunk_by_ukey.expect_get_mut(chunk);
if chunk.id().is_none() {
chunk.set_id(next_id.to_string());
next_id += 1;
}
}
}
}
#[derive(Default)]
pub struct NaturalChunkCompareCache<'a> {
ordered_chunk_modules: FxHashMap<ChunkUkey, Vec<Option<&'a str>>>,
sorted_chunk_groups: FxHashMap<ChunkUkey, Vec<ChunkGroupUkey>>,
}
fn compare_chunks_by_modules<'a>(
chunk_graph: &ChunkGraph,
module_ids: &'a ModuleIdsArtifact,
a: &Chunk,
b: &Chunk,
cache: &mut NaturalChunkCompareCache<'a>,
) -> Ordering {
let a_ukey = a.ukey();
let b_ukey = b.ukey();
cache
.ordered_chunk_modules
.entry(a_ukey)
.or_insert_with(|| {
chunk_graph
.get_ordered_chunk_modules_identifier(&a_ukey)
.into_iter()
.map(|m| ChunkGraph::get_module_id(module_ids, m).map(|s| s.as_str()))
.collect_vec()
});
cache
.ordered_chunk_modules
.entry(b_ukey)
.or_insert_with(|| {
chunk_graph
.get_ordered_chunk_modules_identifier(&b_ukey)
.into_iter()
.map(|m| ChunkGraph::get_module_id(module_ids, m).map(|s| s.as_str()))
.collect_vec()
});
let a_modules = cache
.ordered_chunk_modules
.get(&a_ukey)
.expect("should cache ordered chunk modules");
let b_modules = cache
.ordered_chunk_modules
.get(&b_ukey)
.expect("should cache ordered chunk modules");
a_modules
.iter()
.zip_longest(b_modules.iter())
.find_map(|pair| match pair {
Both(a_module_id, b_module_id) => {
let ordering = compare_ids(
a_module_id.unwrap_or_default(),
b_module_id.unwrap_or_default(),
);
if ordering != Ordering::Equal {
return Some(ordering);
}
None
}
Left(_) => Some(Ordering::Greater),
Right(_) => Some(Ordering::Less),
})
.unwrap_or(Ordering::Equal)
}
fn compare_chunks_by_groups(
chunk_group_by_ukey: &ChunkGroupByUkey,
a: &Chunk,
b: &Chunk,
cache: &mut NaturalChunkCompareCache<'_>,
) -> Ordering {
let a_ukey = a.ukey();
let b_ukey = b.ukey();
cache.sorted_chunk_groups.entry(a_ukey).or_insert_with(|| {
a.get_sorted_groups_iter(chunk_group_by_ukey)
.copied()
.collect_vec()
});
cache.sorted_chunk_groups.entry(b_ukey).or_insert_with(|| {
b.get_sorted_groups_iter(chunk_group_by_ukey)
.copied()
.collect_vec()
});
let a_groups = cache
.sorted_chunk_groups
.get(&a_ukey)
.expect("should cache sorted chunk groups");
let b_groups = cache
.sorted_chunk_groups
.get(&b_ukey)
.expect("should cache sorted chunk groups");
let groups_ordering = a_groups
.iter()
.map(|group| chunk_group_by_ukey.expect_get(group).index)
.zip_longest(
b_groups
.iter()
.map(|group| chunk_group_by_ukey.expect_get(group).index),
)
.find_map(|pair| match pair {
Both(a_group_index, b_group_index) => {
let ordering = a_group_index.cmp(&b_group_index);
if ordering != Ordering::Equal {
Some(ordering)
} else {
None
}
}
Left(_) => Some(Ordering::Greater),
Right(_) => Some(Ordering::Less),
})
.unwrap_or(Ordering::Equal);
if groups_ordering != Ordering::Equal {
return groups_ordering;
}
let Some(group) = a_groups.first() else {
return Ordering::Equal;
};
let group = chunk_group_by_ukey.expect_get(group);
let a_in_children = group
.chunks
.iter()
.find_position(|child| **child == a.ukey());
let b_in_children = group
.chunks
.iter()
.find_position(|child| **child == b.ukey());
a_in_children.cmp(&b_in_children)
}
pub fn compare_chunks_natural<'a>(
chunk_graph: &ChunkGraph,
chunk_group_by_ukey: &ChunkGroupByUkey,
module_ids: &'a ModuleIdsArtifact,
a: &Chunk,
b: &Chunk,
cache: &mut NaturalChunkCompareCache<'a>,
) -> Ordering {
let name_ordering = compare_ids(a.name().unwrap_or_default(), b.name().unwrap_or_default());
if name_ordering != Ordering::Equal {
return name_ordering;
}
let runtime_ordering = compare_runtime(a.runtime(), b.runtime());
if runtime_ordering != Ordering::Equal {
return runtime_ordering;
}
let modules_ordering = compare_chunks_by_modules(chunk_graph, module_ids, a, b, cache);
if modules_ordering != Ordering::Equal {
return modules_ordering;
}
compare_chunks_by_groups(chunk_group_by_ukey, a, b, cache)
}