Skip to content

Commit

Permalink
working on new version
Browse files Browse the repository at this point in the history
  • Loading branch information
scx1332 committed Jul 17, 2024
1 parent 154d89c commit 916afd0
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 53 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ jobs:
sudo mount -t tmpfs -o size=1560M tmpfs tmp
cp test.tar.zstd tmp
cd tmp
cat-once --file test.tar.zstd --safety-time 1 > test2.tar.zstd
cat-once --file test2.tar.zstd --safety-time 1 | tar -I zstd -xf - -C .
cat-once --file test.tar.zstd > test2.tar.zstd
cat-once --file test2.tar.zstd | tar -I zstd -xf - -C .
sha1sum test.file > ../test-out.file.sha1
- name: Compare checksums
Expand Down
11 changes: 7 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ struct Args {
#[clap(short, long, default_value = "50000")]
chunk_size: u64,

/// Dry run, do not perform any operations
#[clap(long)]
dry_run: bool,

#[clap(long)]
test_create_zero_file_size: Option<u64>,

Expand All @@ -36,15 +40,14 @@ struct Args {
test_random: bool,
}

fn cat_file(file_path: &str, chunk_size: u64) -> anyhow::Result<()> {
fn cat_file(file_path: &str, chunk_size: u64, dry_run: bool) -> anyhow::Result<()> {
let file_size = std::fs::metadata(file_path)?.len();
let chunk_size = std::cmp::min(file_size, chunk_size) as u64;
let plan = plan_chunks(chunk_size, file_size).unwrap();
let operations = plan_into_realization(plan).unwrap();
commit_plan(file_path, &operations)
commit_plan(file_path, &operations, dry_run)
}


fn main() -> anyhow::Result<()> {
env::set_var(
"RUST_LOG",
Expand Down Expand Up @@ -80,7 +83,7 @@ fn main() -> anyhow::Result<()> {
}

if !test_run {
return cat_file(&args.file, args.chunk_size);
return cat_file(&args.file, args.chunk_size, args.dry_run);
}
Ok(())
}
100 changes: 64 additions & 36 deletions src/ops.rs
Original file line number Diff line number Diff line change
@@ -1,42 +1,46 @@
use anyhow::{bail};
use anyhow::bail;
use rand::distributions::{Alphanumeric, DistString};
use rand::{thread_rng, Rng};
use std::fs::OpenOptions;
use std::io::{Read, Seek, SeekFrom, Write};

fn truncate_file_int(file_path: &str, target_size: u64) -> anyhow::Result<()> {
//1 open file
let mut file = OpenOptions::new()
.write(true)
.truncate(false)
.open(file_path)?;
if target_size == 0 {
std::fs::remove_file(file_path)?;
Ok(())
} else {
let mut file = OpenOptions::new()
.write(true)
.truncate(false)
.open(file_path)?;

let file_size = file.seek(SeekFrom::End(0))?;
let file_size = file.seek(SeekFrom::End(0))?;

if file_size < target_size {
bail!(
"File size is already smaller than target size {} vs {}",
file_size,
if file_size < target_size {
bail!(
"File size is already smaller than target size {} vs {}",
file_size,
target_size
);
}
if file_size == target_size {
log::debug!("File size is already equal to target size {}", file_size);
return Ok(());
}
log::debug!(
"Truncating file {} to target size {}",
file_path,
target_size
);
}
if file_size == target_size {
log::debug!("File size is already equal to target size {}", file_size);
return Ok(());
}
log::debug!(
"Truncating file {} to target size {}",
file_path,
target_size
);

//2 seek to target size
file.seek(SeekFrom::Start(target_size))?;
//2 seek to target size
file.seek(SeekFrom::Start(target_size))?;

//3 truncate file
file.set_len(target_size)?;
//3 truncate file
file.set_len(target_size)?;

Ok(())
Ok(())
}
}

pub fn truncate_file(file_path: &str, target_size: u64) -> anyhow::Result<()> {
Expand Down Expand Up @@ -143,19 +147,41 @@ fn copy_chunk_int(file_path: &str, src: (u64, u64), dst: (u64, u64)) -> anyhow::
bail!("Destination range is invalid {}-{}", dst.0, dst.1);
}
if src.1 - src.0 != dst.1 - dst.0 {
bail!("Source and destination ranges are not the same size {}-{} {}-{}", src.0, src.1, dst.0, dst.1);
bail!(
"Source and destination ranges are not the same size {}-{} {}-{}",
src.0,
src.1,
dst.0,
dst.1
);
}

//check if chunks are overlapping
if ranges_overlap(src, dst) {
bail!("Source and destination ranges overlap {}-{} {}-{}", src.0, src.1, dst.0, dst.1);
bail!(
"Source and destination ranges overlap {}-{} {}-{}",
src.0,
src.1,
dst.0,
dst.1
);
}
let file_size = std::fs::metadata(file_path)?.len();
if src.1 > file_size {
bail!("Source range is out of bounds {}-{} file size {}", src.0, src.1, file_size);
bail!(
"Source range is out of bounds {}-{} file size {}",
src.0,
src.1,
file_size
);
}
if dst.1 > file_size {
bail!("Destination range is out of bounds {}-{} file size {}", dst.0, dst.1, file_size);
bail!(
"Destination range is out of bounds {}-{} file size {}",
dst.0,
dst.1,
file_size
);
}

//open file for read write
Expand Down Expand Up @@ -185,8 +211,6 @@ fn copy_chunk_int(file_path: &str, src: (u64, u64), dst: (u64, u64)) -> anyhow::
Ok(())
}



pub fn copy_chunk(file_path: &str, src: (u64, u64), dst: (u64, u64)) -> anyhow::Result<()> {
match copy_chunk_int(file_path, src, dst) {
Ok(_) => Ok(()),
Expand All @@ -195,16 +219,20 @@ pub fn copy_chunk(file_path: &str, src: (u64, u64), dst: (u64, u64)) -> anyhow::
Err(e)
}
}

}

pub fn output_chunk_int(file_path: &str, data: (u64, u64)) -> anyhow::Result<()> {
pub fn output_chunk_int(file_path: &str, data: (u64, u64)) -> anyhow::Result<()> {
if data.1 <= data.0 {
bail!("Data range is invalid {}-{}", data.0, data.1);
}
let file_size = std::fs::metadata(file_path)?.len();
if data.1 > file_size {
bail!("Data range is out of bounds {}-{} file size {}", data.0, data.1, file_size);
bail!(
"Data range is out of bounds {}-{} file size {}",
data.0,
data.1,
file_size
);
}

//open file for read write
Expand All @@ -226,7 +254,7 @@ pub fn output_chunk_int(file_path: &str, data: (u64, u64)) -> anyhow::Result<()>
}
Ok(())
}
pub fn output_chunk(file_path: &str, data: (u64, u64)) -> anyhow::Result<()> {
pub fn output_chunk(file_path: &str, data: (u64, u64)) -> anyhow::Result<()> {
match output_chunk_int(file_path, data) {
Ok(_) => Ok(()),
Err(e) => {
Expand Down
60 changes: 49 additions & 11 deletions src/plan.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use anyhow::bail;
use crate::ops::{copy_chunk, output_chunk, truncate_file};
use anyhow::bail;

pub struct ChunkPlan {
chunk_size: u64,
Expand Down Expand Up @@ -34,23 +34,43 @@ pub struct Operation {
pub src_chunk: Option<(u64, u64)>,
pub data_chunk: (u64, u64),
pub truncate_to: u64,
pub is_middle: bool
pub is_middle: bool,
}

pub fn commit_plan(file_path: &str, operations: &[Operation]) -> anyhow::Result<()> {
pub fn commit_plan(file_path: &str, operations: &[Operation], dry_run: bool) -> anyhow::Result<()> {
let mut step_no = 0;
for op in operations {
let middle_msg = if op.is_middle { "(middle) " } else { "" };
log::info!("{} - {}Output chunk {}-{}", step_no, middle_msg, op.data_chunk.0, op.data_chunk.1);
output_chunk(file_path, op.data_chunk).unwrap();
log::info!(
"{} - {}Output chunk {}-{}",
step_no,
middle_msg,
op.data_chunk.0,
op.data_chunk.1
);
if !dry_run {
output_chunk(file_path, op.data_chunk).unwrap();
}
step_no += 1;
if let Some((src_start, src_end)) = op.src_chunk {
log::info!("{} - Copy {} bytes from {}-{} to {}-{}", step_no, src_end - src_start, src_start, src_end, op.data_chunk.0, op.data_chunk.1);
copy_chunk(file_path, (src_start, src_end), op.data_chunk).unwrap();
log::info!(
"{} - Copy {} bytes from {}-{} to {}-{}",
step_no,
src_end - src_start,
src_start,
src_end,
op.data_chunk.0,
op.data_chunk.1
);
if !dry_run {
copy_chunk(file_path, (src_start, src_end), op.data_chunk).unwrap();
}
}
step_no += 1;
log::info!("{} - Truncate file to {} bytes", step_no, op.truncate_to);
truncate_file(file_path, op.truncate_to).unwrap();
if !dry_run {
truncate_file(file_path, op.truncate_to).unwrap();
}
step_no += 1;
}
Ok(())
Expand All @@ -60,10 +80,24 @@ pub fn explain_plan(operations: &[Operation]) {
let mut step_no = 0;
for op in operations {
let middle_msg = if op.is_middle { "(middle) " } else { "" };
log::info!("{} - {}Output chunk {}-{}", step_no, middle_msg, op.data_chunk.0, op.data_chunk.1);
log::info!(
"{} - {}Output chunk {}-{}",
step_no,
middle_msg,
op.data_chunk.0,
op.data_chunk.1
);
step_no += 1;
if let Some((src_start, src_end)) = op.src_chunk {
log::info!("{} - Copy {} bytes from {}-{} to {}-{}", step_no, src_end - src_start, src_start, src_end, op.data_chunk.0, op.data_chunk.1);
log::info!(
"{} - Copy {} bytes from {}-{} to {}-{}",
step_no,
src_end - src_start,
src_start,
src_end,
op.data_chunk.0,
op.data_chunk.1
);
}
step_no += 1;
log::info!("{} - Truncate file to {} bytes", step_no, op.truncate_to);
Expand All @@ -75,7 +109,11 @@ pub fn plan_into_realization(plan: ChunkPlan) -> anyhow::Result<Vec<Operation>>
let mut operations = Vec::new();
let mut operation_no = 0;
let operation_limit = 1000000;
log::info!("Realizing plan for file size {} and chunk size {}", plan.file_size, plan.chunk_size);
log::info!(
"Realizing plan for file size {} and chunk size {}",
plan.file_size,
plan.chunk_size
);
for i in 0..plan.start_chunks {
let dst_chunk_start = i * plan.chunk_size;
let dst_chunk_end = dst_chunk_start + plan.chunk_size;
Expand Down

0 comments on commit 916afd0

Please sign in to comment.