From 781f66883977e37a849f228ee1dffddbb7acdf32 Mon Sep 17 00:00:00 2001 From: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Wed, 26 Jun 2024 14:27:14 +0200 Subject: [PATCH] FEAT: add untar (#3) * FEAT: add untar * Use meta_executable --- CHANGELOG | 5 +- src/untar/config.vsh.yaml | 43 +++++++++++++ src/untar/script.sh | 41 +++++++++++++ src/untar/test.sh | 126 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 src/untar/config.vsh.yaml create mode 100644 src/untar/script.sh create mode 100644 src/untar/test.sh diff --git a/CHANGELOG b/CHANGELOG index c9616a3..4630af0 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,4 +2,7 @@ ## NEW FEATURES -* `csv2fasta`: Convert two columns from a CSV file to FASTA entries (PR #1). \ No newline at end of file +* `csv2fasta`: Convert two columns from a CSV file to FASTA entries (PR #1). + +* `untar`: Unpack a .tar file. When the contents of the .tar file is just a single directory, + put the contents of the directory into the output folder instead of that directory (PR #3). \ No newline at end of file diff --git a/src/untar/config.vsh.yaml b/src/untar/config.vsh.yaml new file mode 100644 index 0000000..727ed61 --- /dev/null +++ b/src/untar/config.vsh.yaml @@ -0,0 +1,43 @@ +name: untar +description: | + Unpack a .tar file. When the contents of the .tar file is just a single directory, + put the contents of the directory into the output folder instead of that directory. +argument_groups: + - name: Input arguments + arguments: + - name: --input + description: Tarball file to be unpacked. + type: file + required: true + - name: Output arguments + arguments: + - name: --output + description: Directory to write the contents of the .tar file to. + type: file + direction: output + required: true + - name: "Other arguments" + arguments: + - name: "--exclude" + alternatives: ["-e"] + type: string + description: Prevents any file or member whose name matches the shell wildcard (pattern) from being extracted. + example: "docs/figures" + required: false +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh +engines: + - type: docker + image: debian:stable-slim + setup: + - type: apt + packages: + - procps + +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/untar/script.sh b/src/untar/script.sh new file mode 100644 index 0000000..253ff47 --- /dev/null +++ b/src/untar/script.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -eo pipefail + +extra_args=() + +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR" +} +trap clean_up EXIT + +# Check if tarball contains 1 top-level directory. If so, extract the contents of the +# directory to the output directory instead of the directory itself. +echo "Directory contents:" +tar -taf "${par_input}" > "$TMPDIR/tar_contents.txt" +cat "$TMPDIR/tar_contents.txt" + +printf "Checking if tarball contains only a single top-level directory: " +if [[ $(grep -o -E '^[./]*[^/]+/$' "$TMPDIR/tar_contents.txt" | uniq | wc -l) -eq 1 ]]; then + echo "It does." + echo "Extracting the contents of the top-level directory to the output directory instead of the directory itself." + # The directory can be both of the format './' (or ././) or just + # Adjust the number of stripped components accordingly by looking for './' at the beginning of the file. + starting_relative=$(grep -oP -m 1 '^(./)*' "$TMPDIR/tar_contents.txt" | tr -d '\n' | wc -c) + n_strips=$(( ($starting_relative / 2)+1 )) + extra_args+=("--strip-components=$n_strips") +else + echo "It does not." +fi + +if [ "$par_exclude" != "" ]; then + echo "Exclusion of files with wildcard '$par_exclude' requested." + extra_args+=("--exclude=$par_exclude") +fi + +echo "Starting extraction of tarball '$par_input' to output directory '$par_output'." +mkdir -p "$par_output" +echo "executing 'tar --no-same-owner --no-same-permissions --directory=$par_output ${extra_args[@]} -xavf $par_input'" +tar --no-same-owner --no-same-permissions --directory="$par_output" ${extra_args[@]} -xavf "$par_input" + diff --git a/src/untar/test.sh b/src/untar/test.sh new file mode 100644 index 0000000..def7ebd --- /dev/null +++ b/src/untar/test.sh @@ -0,0 +1,126 @@ +#!/usr/bin/env bash + +set -eo pipefail + +# create tempdir +echo ">>> Creating temporary test directory." +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR" +} +trap clean_up EXIT +echo ">>> Created temporary directory '$TMPDIR'." + +INPUT_FILE="$TMPDIR/test_file.txt" +echo ">>> Creating test input file at '$TMPDIR/test_file.txt'." +echo "foo" > "$INPUT_FILE" +echo ">>> Created '$INPUT_FILE'." + +echo ">>> Creating tar.gz from '$INPUT_FILE'." +TARFILE="${INPUT_FILE}.tar.gz" +tar -C "$TMPDIR" -czvf ${INPUT_FILE}.tar.gz $(basename "$INPUT_FILE") +[[ ! -f "$TARFILE" ]] && echo ">>> Test setup failed: could not create tarfile." && exit 1 +echo ">>> '$TARFILE' created." + +echo ">>> Check whether tar.gz can be extracted" +echo ">>> Creating temporary output directory for test 1." +OUTPUT_DIR_1="$TMPDIR/output_test_1/" +mkdir "$OUTPUT_DIR_1" + +echo ">>> Extracting '$TARFILE' to '$OUTPUT_DIR_1'". +$meta_executable \ + --input "$TARFILE" \ + --output "$OUTPUT_DIR_1" + +echo ">>> Check whether extracted file exists" +[[ ! -f "$OUTPUT_DIR_1/test_file.txt" ]] && echo "Output file could not be found. Output directory contents: " && ls "$OUTPUT_DIR_1" && exit 1 + +echo ">>> Creating temporary output directory for test 2." +OUTPUT_DIR_2="$TMPDIR/output_test_2/" +mkdir "$OUTPUT_DIR_2" + +echo ">>> Extracting '$TARFILE' to '$OUTPUT_DIR_2', excluding '$test_file.txt'". +$meta_executable \ + --input "$TARFILE" \ + --output "$OUTPUT_DIR_2" \ + --exclude 'test_file.txt' + +echo ">>> Check whether excluded file was not extracted" +[[ -f "$OUTPUT_DIR_2/test_file.txt" ]] && echo "File should have been excluded! Output directory contents:" && ls "$OUTPUT_DIR_2" && exit 1 + +echo ">>> Creating test tarball containing only 1 top-level directory." +mkdir "$TMPDIR/input_test_3/" +cp "$INPUT_FILE" "$TMPDIR/input_test_3/" +tar -C "$TMPDIR" -czvf "$TMPDIR/input_test_3.tar.gz" $(basename "$TMPDIR/input_test_3") +TARFILE_3="$TMPDIR/input_test_3.tar.gz" + +echo ">>> Creating temporary output directory for test 3." +OUTPUT_DIR_3="$TMPDIR/output_test_3/" +mkdir "$OUTPUT_DIR_3" + +echo "Extracting '$TARFILE_3' to '$OUTPUT_DIR_3'". +$meta_executable \ + --input "$TARFILE_3" \ + --output "$OUTPUT_DIR_3" + +echo ">>> Check whether extracted file exists" +[[ ! -f "$OUTPUT_DIR_3/test_file.txt" ]] && echo "Output file could not be found!" && exit 1 + +echo ">>> Check for tar archive that contains a single directory starting with './'." +mkdir "$TMPDIR/input_test_4/" +cp "$INPUT_FILE" "$TMPDIR/input_test_4/" + +pushd "$TMPDIR/" +trap popd ERR +tar -czvf "$TMPDIR/input_test_4.tar.gz" ./input_test_4 +popd +trap - ERR + +OUTPUT_DIR_4="$TMPDIR/output_test_4/" +echo "Extracting '$TMPDIR/input_test_4.tar.gz' to '$OUTPUT_DIR_4'". +$meta_executable \ + --input "$TMPDIR/input_test_4.tar.gz" \ + --output "$OUTPUT_DIR_4" + +echo ">>> Check whether extracted file exists" +[[ ! -f "$OUTPUT_DIR_4/test_file.txt" ]] && echo "Output file could not be found!" && exit 1 + +echo ">>> Creating test tarball containing only 1 top-level directory, but it is nested." +mkdir -p "$TMPDIR/input_test_5/nested/" +cp "$INPUT_FILE" "$TMPDIR/input_test_5/nested/" +tar -C "$TMPDIR" -czvf "$TMPDIR/input_test_5.tar.gz" $(basename "$TMPDIR/input_test_5") +TARFILE_5="$TMPDIR/input_test_5.tar.gz" + +echo ">>> Creating temporary output directory for test 5." +OUTPUT_DIR_5="$TMPDIR/output_test_5/" +mkdir "$OUTPUT_DIR_5" + +echo "Extracting '$TARFILE_5' to '$OUTPUT_DIR_5'". +$meta_executable \ + --input "$TARFILE_5" \ + --output "$OUTPUT_DIR_5" + +echo ">>> Check whether extracted file exists" +[[ ! -f "$OUTPUT_DIR_5/nested/test_file.txt" ]] && echo "Output file could not be found!" && exit 1 + +echo ">>> Creating test tarball containing two top-level directories." +mkdir -p "$TMPDIR/input_test_6/number_1/" +mkdir "$TMPDIR/input_test_6/number_2/" +cp "$INPUT_FILE" "$TMPDIR/input_test_6/number_1/" +tar -C "$TMPDIR" -czvf "$TMPDIR/input_test_6.tar.gz" $(basename "$TMPDIR/input_test_6") +TARFILE_6="$TMPDIR/input_test_6.tar.gz" + +echo ">>> Creating temporary output directory for test 6." +OUTPUT_DIR_6="$TMPDIR/output_test_6/" +mkdir "$OUTPUT_DIR_6" + +echo "Extracting '$TARFILE_6' to '$OUTPUT_DIR_6'". +$meta_executable \ + --input "$TARFILE_6" \ + --output "$OUTPUT_DIR_6" + +echo ">>> Check whether extracted file exists" +[[ ! -f "$OUTPUT_DIR_6/number_1/test_file.txt" ]] && echo "Output file could not be found!" && exit 1 +[[ ! -d "$OUTPUT_DIR_6/number_2" ]] && echo "Output directory could not be found!" && exit 1 + +echo ">>> Test finished successfully"