From 916afd041cf5d8cda7e70e8f7a432e980837d562 Mon Sep 17 00:00:00 2001 From: scx1332 Date: Wed, 17 Jul 2024 14:32:23 +0200 Subject: [PATCH] working on new version --- .github/workflows/ci.yml | 4 +- src/main.rs | 11 +++-- src/ops.rs | 100 +++++++++++++++++++++++++-------------- src/plan.rs | 60 ++++++++++++++++++----- 4 files changed, 122 insertions(+), 53 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 72d77f5..3642998 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,8 +29,8 @@ jobs: sudo mount -t tmpfs -o size=1560M tmpfs tmp cp test.tar.zstd tmp cd tmp - cat-once --file test.tar.zstd --safety-time 1 > test2.tar.zstd - cat-once --file test2.tar.zstd --safety-time 1 | tar -I zstd -xf - -C . + cat-once --file test.tar.zstd > test2.tar.zstd + cat-once --file test2.tar.zstd | tar -I zstd -xf - -C . sha1sum test.file > ../test-out.file.sha1 - name: Compare checksums diff --git a/src/main.rs b/src/main.rs index daa06af..ce055a1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,6 +17,10 @@ struct Args { #[clap(short, long, default_value = "50000")] chunk_size: u64, + /// Dry run, do not perform any operations + #[clap(long)] + dry_run: bool, + #[clap(long)] test_create_zero_file_size: Option, @@ -36,15 +40,14 @@ struct Args { test_random: bool, } -fn cat_file(file_path: &str, chunk_size: u64) -> anyhow::Result<()> { +fn cat_file(file_path: &str, chunk_size: u64, dry_run: bool) -> anyhow::Result<()> { let file_size = std::fs::metadata(file_path)?.len(); let chunk_size = std::cmp::min(file_size, chunk_size) as u64; let plan = plan_chunks(chunk_size, file_size).unwrap(); let operations = plan_into_realization(plan).unwrap(); - commit_plan(file_path, &operations) + commit_plan(file_path, &operations, dry_run) } - fn main() -> anyhow::Result<()> { env::set_var( "RUST_LOG", @@ -80,7 +83,7 @@ fn main() -> anyhow::Result<()> { } if !test_run { - return cat_file(&args.file, args.chunk_size); + return cat_file(&args.file, args.chunk_size, args.dry_run); } Ok(()) } diff --git a/src/ops.rs b/src/ops.rs index f7e96af..3fc50e7 100644 --- a/src/ops.rs +++ b/src/ops.rs @@ -1,42 +1,46 @@ -use anyhow::{bail}; +use anyhow::bail; use rand::distributions::{Alphanumeric, DistString}; use rand::{thread_rng, Rng}; use std::fs::OpenOptions; use std::io::{Read, Seek, SeekFrom, Write}; fn truncate_file_int(file_path: &str, target_size: u64) -> anyhow::Result<()> { - //1 open file - let mut file = OpenOptions::new() - .write(true) - .truncate(false) - .open(file_path)?; + if target_size == 0 { + std::fs::remove_file(file_path)?; + Ok(()) + } else { + let mut file = OpenOptions::new() + .write(true) + .truncate(false) + .open(file_path)?; - let file_size = file.seek(SeekFrom::End(0))?; + let file_size = file.seek(SeekFrom::End(0))?; - if file_size < target_size { - bail!( - "File size is already smaller than target size {} vs {}", - file_size, + if file_size < target_size { + bail!( + "File size is already smaller than target size {} vs {}", + file_size, + target_size + ); + } + if file_size == target_size { + log::debug!("File size is already equal to target size {}", file_size); + return Ok(()); + } + log::debug!( + "Truncating file {} to target size {}", + file_path, target_size ); - } - if file_size == target_size { - log::debug!("File size is already equal to target size {}", file_size); - return Ok(()); - } - log::debug!( - "Truncating file {} to target size {}", - file_path, - target_size - ); - //2 seek to target size - file.seek(SeekFrom::Start(target_size))?; + //2 seek to target size + file.seek(SeekFrom::Start(target_size))?; - //3 truncate file - file.set_len(target_size)?; + //3 truncate file + file.set_len(target_size)?; - Ok(()) + Ok(()) + } } pub fn truncate_file(file_path: &str, target_size: u64) -> anyhow::Result<()> { @@ -143,19 +147,41 @@ fn copy_chunk_int(file_path: &str, src: (u64, u64), dst: (u64, u64)) -> anyhow:: bail!("Destination range is invalid {}-{}", dst.0, dst.1); } if src.1 - src.0 != dst.1 - dst.0 { - bail!("Source and destination ranges are not the same size {}-{} {}-{}", src.0, src.1, dst.0, dst.1); + bail!( + "Source and destination ranges are not the same size {}-{} {}-{}", + src.0, + src.1, + dst.0, + dst.1 + ); } //check if chunks are overlapping if ranges_overlap(src, dst) { - bail!("Source and destination ranges overlap {}-{} {}-{}", src.0, src.1, dst.0, dst.1); + bail!( + "Source and destination ranges overlap {}-{} {}-{}", + src.0, + src.1, + dst.0, + dst.1 + ); } let file_size = std::fs::metadata(file_path)?.len(); if src.1 > file_size { - bail!("Source range is out of bounds {}-{} file size {}", src.0, src.1, file_size); + bail!( + "Source range is out of bounds {}-{} file size {}", + src.0, + src.1, + file_size + ); } if dst.1 > file_size { - bail!("Destination range is out of bounds {}-{} file size {}", dst.0, dst.1, file_size); + bail!( + "Destination range is out of bounds {}-{} file size {}", + dst.0, + dst.1, + file_size + ); } //open file for read write @@ -185,8 +211,6 @@ fn copy_chunk_int(file_path: &str, src: (u64, u64), dst: (u64, u64)) -> anyhow:: Ok(()) } - - pub fn copy_chunk(file_path: &str, src: (u64, u64), dst: (u64, u64)) -> anyhow::Result<()> { match copy_chunk_int(file_path, src, dst) { Ok(_) => Ok(()), @@ -195,16 +219,20 @@ pub fn copy_chunk(file_path: &str, src: (u64, u64), dst: (u64, u64)) -> anyhow:: Err(e) } } - } -pub fn output_chunk_int(file_path: &str, data: (u64, u64)) -> anyhow::Result<()> { +pub fn output_chunk_int(file_path: &str, data: (u64, u64)) -> anyhow::Result<()> { if data.1 <= data.0 { bail!("Data range is invalid {}-{}", data.0, data.1); } let file_size = std::fs::metadata(file_path)?.len(); if data.1 > file_size { - bail!("Data range is out of bounds {}-{} file size {}", data.0, data.1, file_size); + bail!( + "Data range is out of bounds {}-{} file size {}", + data.0, + data.1, + file_size + ); } //open file for read write @@ -226,7 +254,7 @@ pub fn output_chunk_int(file_path: &str, data: (u64, u64)) -> anyhow::Result<()> } Ok(()) } -pub fn output_chunk(file_path: &str, data: (u64, u64)) -> anyhow::Result<()> { +pub fn output_chunk(file_path: &str, data: (u64, u64)) -> anyhow::Result<()> { match output_chunk_int(file_path, data) { Ok(_) => Ok(()), Err(e) => { diff --git a/src/plan.rs b/src/plan.rs index 7e9cb2b..d0987c0 100644 --- a/src/plan.rs +++ b/src/plan.rs @@ -1,5 +1,5 @@ -use anyhow::bail; use crate::ops::{copy_chunk, output_chunk, truncate_file}; +use anyhow::bail; pub struct ChunkPlan { chunk_size: u64, @@ -34,23 +34,43 @@ pub struct Operation { pub src_chunk: Option<(u64, u64)>, pub data_chunk: (u64, u64), pub truncate_to: u64, - pub is_middle: bool + pub is_middle: bool, } -pub fn commit_plan(file_path: &str, operations: &[Operation]) -> anyhow::Result<()> { +pub fn commit_plan(file_path: &str, operations: &[Operation], dry_run: bool) -> anyhow::Result<()> { let mut step_no = 0; for op in operations { let middle_msg = if op.is_middle { "(middle) " } else { "" }; - log::info!("{} - {}Output chunk {}-{}", step_no, middle_msg, op.data_chunk.0, op.data_chunk.1); - output_chunk(file_path, op.data_chunk).unwrap(); + log::info!( + "{} - {}Output chunk {}-{}", + step_no, + middle_msg, + op.data_chunk.0, + op.data_chunk.1 + ); + if !dry_run { + output_chunk(file_path, op.data_chunk).unwrap(); + } step_no += 1; if let Some((src_start, src_end)) = op.src_chunk { - log::info!("{} - Copy {} bytes from {}-{} to {}-{}", step_no, src_end - src_start, src_start, src_end, op.data_chunk.0, op.data_chunk.1); - copy_chunk(file_path, (src_start, src_end), op.data_chunk).unwrap(); + log::info!( + "{} - Copy {} bytes from {}-{} to {}-{}", + step_no, + src_end - src_start, + src_start, + src_end, + op.data_chunk.0, + op.data_chunk.1 + ); + if !dry_run { + copy_chunk(file_path, (src_start, src_end), op.data_chunk).unwrap(); + } } step_no += 1; log::info!("{} - Truncate file to {} bytes", step_no, op.truncate_to); - truncate_file(file_path, op.truncate_to).unwrap(); + if !dry_run { + truncate_file(file_path, op.truncate_to).unwrap(); + } step_no += 1; } Ok(()) @@ -60,10 +80,24 @@ pub fn explain_plan(operations: &[Operation]) { let mut step_no = 0; for op in operations { let middle_msg = if op.is_middle { "(middle) " } else { "" }; - log::info!("{} - {}Output chunk {}-{}", step_no, middle_msg, op.data_chunk.0, op.data_chunk.1); + log::info!( + "{} - {}Output chunk {}-{}", + step_no, + middle_msg, + op.data_chunk.0, + op.data_chunk.1 + ); step_no += 1; if let Some((src_start, src_end)) = op.src_chunk { - log::info!("{} - Copy {} bytes from {}-{} to {}-{}", step_no, src_end - src_start, src_start, src_end, op.data_chunk.0, op.data_chunk.1); + log::info!( + "{} - Copy {} bytes from {}-{} to {}-{}", + step_no, + src_end - src_start, + src_start, + src_end, + op.data_chunk.0, + op.data_chunk.1 + ); } step_no += 1; log::info!("{} - Truncate file to {} bytes", step_no, op.truncate_to); @@ -75,7 +109,11 @@ pub fn plan_into_realization(plan: ChunkPlan) -> anyhow::Result> let mut operations = Vec::new(); let mut operation_no = 0; let operation_limit = 1000000; - log::info!("Realizing plan for file size {} and chunk size {}", plan.file_size, plan.chunk_size); + log::info!( + "Realizing plan for file size {} and chunk size {}", + plan.file_size, + plan.chunk_size + ); for i in 0..plan.start_chunks { let dst_chunk_start = i * plan.chunk_size; let dst_chunk_end = dst_chunk_start + plan.chunk_size;