Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: split big chunks based on modules path similarities #8775

Merged
merged 1 commit into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions crates/rspack_plugin_split_chunks/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ impl SplitChunkSizes {
pub fn add_by(&mut self, other: &Self) {
self.combine_with(other, &|a, b| a + b)
}

pub fn subtract_by(&mut self, other: &Self) {
self.combine_with(other, &|a, b| a - b)
}
}

impl Deref for SplitChunkSizes {
Expand Down
114 changes: 92 additions & 22 deletions crates/rspack_plugin_split_chunks/src/plugin/max_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ use regex::Regex;
use rspack_collections::{DatabaseItem, UkeyMap};
use rspack_core::incremental::Mutation;
use rspack_core::{
compare_modules_by_identifier, ChunkUkey, Compilation, CompilerOptions, Module, ModuleIdentifier,
DEFAULT_DELIMITER,
ChunkUkey, Compilation, CompilerOptions, Module, ModuleIdentifier, DEFAULT_DELIMITER,
};
use rspack_error::Result;
use rspack_hash::{RspackHash, RspackHashDigest};
Expand All @@ -28,17 +27,19 @@ struct Group {
nodes: Vec<GroupItem>,
pub size: SplitChunkSizes,
pub key: Option<String>,
pub similarities: Vec<usize>,
}

impl Group {
fn new(items: Vec<GroupItem>, key: Option<String>) -> Self {
fn new(items: Vec<GroupItem>, key: Option<String>, similarities: Vec<usize>) -> Self {
let mut summed_size = SplitChunkSizes::empty();
items.iter().for_each(|item| summed_size.add_by(&item.size));

Self {
nodes: items,
size: summed_size,
key,
similarities,
}
}
}
Expand Down Expand Up @@ -84,21 +85,19 @@ fn deterministic_grouping_for_modules(
) -> Vec<Group> {
let mut results: Vec<Group> = Default::default();
let module_graph = compilation.get_module_graph();
let mut items = compilation
let items = compilation
.chunk_graph
.get_chunk_modules(chunk, &module_graph);

items.sort_unstable_by(|a, b| compare_modules_by_identifier(a, b));

let context = compilation.options.context.as_ref();

let nodes = items.into_iter().map(|module| {
let module: &dyn Module = &**module;
let name: String = if module.name_for_condition().is_some() {
make_paths_relative(context, module.identifier().as_str())
let name: String = if let Some(name_for_condition) = module.name_for_condition() {
make_paths_relative(context, &name_for_condition)
} else {
let path = make_paths_relative(context, module.identifier().as_str());
REPLACE_MODULE_IDENTIFIER_REG
.replace_all(&module.identifier(), "")
.replace_all(&path, "")
.to_string()
};
let key = format!(
Expand All @@ -114,7 +113,7 @@ fn deterministic_grouping_for_modules(
}
});

let initial_nodes = nodes
let mut initial_nodes = nodes
.into_iter()
.filter_map(|node| {
// The Module itself is already bigger than `allow_max_size`, we will create a chunk
Expand All @@ -127,16 +126,19 @@ fn deterministic_grouping_for_modules(
allow_max_size
);
let key = node.key.clone();
results.push(Group::new(vec![node], Some(key)));
results.push(Group::new(vec![node], Some(key), vec![]));
None
} else {
Some(node)
}
})
.collect::<Vec<_>>();

initial_nodes.sort_by(|a, b| a.key.cmp(&b.key));

if !initial_nodes.is_empty() {
let initial_group = Group::new(initial_nodes, None);
let similarities = get_similarities(&initial_nodes);
let initial_group = Group::new(initial_nodes, None, similarities);

let mut queue = vec![initial_group];

Expand All @@ -159,16 +161,17 @@ fn deterministic_grouping_for_modules(
left += 1;
}

let mut right = group.nodes.len() - 2;
let mut right: i32 = group.nodes.len() as i32 - 2;
let mut right_size = SplitChunkSizes::empty();
right_size.add_by(&group.nodes[right + 1].size);
while right != 0 && right_size.smaller_than(min_size) {
right_size.add_by(&group.nodes[right].size);
right_size.add_by(&group.nodes[right as usize + 1].size);

right = right.saturating_sub(1);
while right >= 0 && right_size.smaller_than(min_size) {
right_size.add_by(&group.nodes[right as usize].size);

right -= 1;
}

if left - 1 > right {
if left - 1 > right as usize {
// There are overlaps

// TODO(hyf0): There are some algorithms we could do better in this
Expand All @@ -182,11 +185,53 @@ fn deterministic_grouping_for_modules(
results.push(group);
continue;
} else {
let mut pos = left;
let mut best = -1;
let mut best_similarity = usize::MAX;
right_size = group.nodes.iter().rev().take(group.nodes.len() - pos).fold(
SplitChunkSizes::empty(),
|mut acc, node| {
acc.add_by(&node.size);
acc
},
);

while pos <= right as usize + 1 {
let similarity = group.similarities[pos - 1];
if similarity < best_similarity
&& left_size.bigger_than(min_size)
&& right_size.bigger_than(min_size)
{
best_similarity = similarity;
best = pos as i32;
}
let size = &group.nodes[pos].size;
left_size.add_by(size);
right_size.subtract_by(size);
pos += 1;
}

if best == -1 {
results.push(group);
continue;
}

left = best as usize;
right = best - 1;

let mut right_similarities = vec![];
for i in right as usize + 2..group.nodes.len() {
right_similarities.push((group.similarities)[i - 1]);
}

let mut left_similarities = vec![];
for i in 1..left {
left_similarities.push((group.similarities)[i - 1]);
}
let right_nodes = group.nodes.split_off(left);
let left_nodes = group.nodes;

queue.push(Group::new(right_nodes, None));
queue.push(Group::new(left_nodes, None));
queue.push(Group::new(right_nodes, None, right_similarities));
queue.push(Group::new(left_nodes, None, left_similarities));
}
}
}
Expand All @@ -204,6 +249,31 @@ struct ChunkWithSizeInfo<'a> {
pub automatic_name_delimiter: &'a String,
}

fn get_similarities(nodes: &[GroupItem]) -> Vec<usize> {
let mut similarities = Vec::with_capacity(nodes.len());
let mut nodes = nodes.iter();
let Some(mut last) = nodes.next() else {
return similarities;
};

for node in nodes {
similarities.push(similarity(&last.key, &node.key));
last = node;
}

similarities
}

fn similarity(a: &str, b: &str) -> usize {
let mut a = a.chars();
let mut b = b.chars();
let mut dist = 0;
while let (Some(ca), Some(cb)) = (a.next(), b.next()) {
dist += std::cmp::max(0, 10 - (ca as i32 - cb as i32).abs());
}
dist as usize
}

impl SplitChunksPlugin {
/// Affected by `splitChunks.minSize`/`splitChunks.cacheGroups.{cacheGroup}.minSize`
#[tracing::instrument(skip_all)]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/** @type {import("@rspack/core").Configuration} */
module.exports = {
target: 'node',
entry: "./src/index.js",
output: {
filename: '[name].js'
},
optimization: {
chunkIds: 'named',
moduleIds: 'named',
splitChunks: {
chunks: "all",
cacheGroups: {
fragment: {
minChunks: 1,
maxSize: 200 * 1024,
priority: 10,
}
}
}
}
};
Loading
Loading