Skip to content

Commit

Permalink
Merge pull request #365 from serpent-os/fix/lulz-we-didnt-dedupe
Browse files Browse the repository at this point in the history
Actually dedupe content payload
  • Loading branch information
ikeycode authored Nov 4, 2024
2 parents 314f354 + d265b2c commit 3c96dc8
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 8 deletions.
8 changes: 8 additions & 0 deletions boulder/src/package/collect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,14 @@ impl PathInfo {
matches!(self.layout.entry, layout::Entry::Regular(_, _))
}

pub fn file_hash(&self) -> Option<u128> {
if let layout::Entry::Regular(hash, _) = &self.layout.entry {
Some(*hash)
} else {
None
}
}

pub fn file_name(&self) -> &str {
self.target_path
.file_name()
Expand Down
22 changes: 14 additions & 8 deletions boulder/src/package/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,15 +139,21 @@ pub fn emit(paths: &Paths, recipe: &Recipe, packages: &[Package]) -> Result<(),
fn emit_package(paths: &Paths, package: &Package) -> Result<(), Error> {
let filename = package.filename();

// Sort all files by size, largest to smallest
let sorted_files = package
// Filter for all files -> dedupe by hash -> sort largest to smallest
let files = package
.analysis
.paths
.iter()
.filter(|p| p.is_file())
.sorted_by(|a, b| a.size.cmp(&b.size).reverse())
// Filter by file
.filter_map(|info| info.file_hash().map(|hash| (hash, info)))
// Dedupe by hash
.unique_by(|(hash, _)| *hash)
// Sort largest to smallest
.sorted_by(|(_, a), (_, b)| a.size.cmp(&b.size).reverse())
.map(|(_, info)| info)
.collect::<Vec<_>>();
let total_file_size = sorted_files.iter().map(|p| p.size).sum();

let total_file_size = files.iter().map(|info| info.size).sum();

let pb = ProgressBar::new(total_file_size)
.with_message(format!("Generating {filename}"))
Expand Down Expand Up @@ -188,7 +194,7 @@ fn emit_package(paths: &Paths, package: &Package) -> Result<(), Error> {
}

// Only add content payload if we have some files
if !sorted_files.is_empty() {
if !files.is_empty() {
// Temp file for building content payload
let temp_content_path = format!("/tmp/{}.tmp", &filename);
let mut temp_content = fs::OpenOptions::new()
Expand All @@ -201,8 +207,8 @@ fn emit_package(paths: &Paths, package: &Package) -> Result<(), Error> {
let mut writer =
writer.with_content(&mut temp_content, Some(total_file_size), util::num_cpus().get() as u32)?;

for file in sorted_files {
let file = File::open(&file.path)?;
for info in files {
let file = File::open(&info.path)?;
writer.add_content(&mut pb.wrap_read(&file))?;
}

Expand Down

0 comments on commit 3c96dc8

Please sign in to comment.