Skip to content

Commit

Permalink
Merge pull request #4 from panubo/feature/load-direct-source
Browse files Browse the repository at this point in the history
Add support for loading from a fully qualified path, or timestamp
  • Loading branch information
macropin authored Apr 14, 2023
2 parents 32ba064 + 36931a1 commit a918874
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 24 deletions.
108 changes: 108 additions & 0 deletions commands/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ get_storage_commands() {
fetch_cmd=( "gsutil" "cp" )
storage_type="gs"
gsutil_auth
find_object="find_object_gs"
;;
s3://*)
echo ">> Storage type: aws s3"
Expand All @@ -72,6 +73,7 @@ get_storage_commands() {
ls_cmd=( "aws" "s3" "ls" "${AWS_S3_ADDITIONAL_ARGS}" )
fetch_cmd=( "aws" "s3" "cp" "${AWS_S3_ADDITIONAL_ARGS}" )
storage_type="s3"
find_object="find_object_s3"
;;
file://*|/*|./*)
echo ">> Storage type: file"
Expand All @@ -80,6 +82,7 @@ get_storage_commands() {
fetch_cmd=( "cat" )
source="${source#file:\/\/}"
storage_type="file"
find_object="find_object_file"
;;
*)
echoerr "Unknown storage type"
Expand Down Expand Up @@ -136,3 +139,108 @@ gsutil_auth() {
"service_account = default" > /etc/boto.cfg
fi
}

# helper functions
function get_filename_from_object_path() {
# Returns just the filename portion of the full object path
echo "${1}" | sed -E -e 's/.*[\/ ]([^\/]*)$/\1/'
}

function get_basename_from_object_path() {
# Returns just the bucketname / base path
echo "${1}" | sed 's/\(file\|s3\|gs\):\/\/\([^\/]\+\)\/.*/\1:\/\/\2\//'
}

function get_timestamp_from_object_path() {
# Returns just the timestamp portion of the full object path 2-14 digits
echo "${1}" | sed -n 's/.*\/\([0-9]\{2,14\}\).*/\1/p; t; q;'
}

function check_object_exists() {
if [[ $(eval "${ls_cmd[@]}" "${1}") ]]; then
return 0
else
echoerr "Error file not found"
return 1
fi
}

function find_object_gs {
# find the object
# the following are are all valid
# gs://mybucket/20230413000003/my_database.sql.lz4
# gs://mybucket/20230413000003/ my_database
# gs://mybucket/ my_database
# gs://mybucket/20230413 my_database

source="${1}"
database="${2:-}"
timestamp="$(get_timestamp_from_object_path "${source}")"
base="$(get_basename_from_object_path "${source}")"

if [[ "${timestamp}" == "" ]]; then
# no timestamp in the path, find the latest
timestamp="$(eval "${ls_cmd[@]}" "${source}" | sed -E -e '/[0-9]{14}/!d' -e 's/.*([0-9]{14})\/$/\1/' | sort | tail -n1)"
full_path="$(eval "${ls_cmd[@]}" "${source}${timestamp}/" | grep "/${database}[\.\-]")"
else
# has timestamp, either fully qualified, or needs expanding
if [[ $source =~ [0-9]{14}/${database} ]]; then
# should be complete path
full_path="${source}"
elif [[ $source =~ [0-9]{14} ]]; then
# complete timestamp
full_path="$(eval "${ls_cmd[@]}" "${source}" | grep "/${database}[\.\-]")"
else
# partial timestamp. search for matching object path
full_path="$(eval "${ls_cmd[@]}" "${base}${timestamp}*/" | grep "/${database}[\.\-]")"
fi
fi
check_object_exists "${full_path}" || { echoerr "Error file not found"; exit 1; }
echo "${full_path}"
}


function find_object_s3 {
# find the object
# the following are are all valid
# s3://mybucket/20230413000003/my_database.sql.lz4
# s3://mybucket/20230413000003/ my_database
# s3://mybucket/ my_database
# s3://mybucket/20230413 my_database

source="${1}"
database="${2:-}"
timestamp="$(get_timestamp_from_object_path "${source}")"
base="$(get_basename_from_object_path "${source}")"

if [[ "${timestamp}" == "" ]]; then
# no timestamp in the path, find the latest
timestamp="$(eval "${ls_cmd[@]}" "${base}" | sed -E -e '/[0-9]{14}/!d' -e 's/.*([0-9]{14})\/$/\1/' | sort | tail -n1)"
file="$(eval "${ls_cmd[@]}" "${base}${timestamp}/" | sed -E -e 's/.*[\/ ]([^\/]*)$/\1/' | grep "^${database}[\.\-]")"
full_path="${base}${timestamp}/${file}"
else
# has timestamp, either fully qualified, or needs expanding
if [[ $source =~ [0-9]{14}/${database} ]]; then
# should be complete path
full_path="${source}"
elif [[ $source =~ [0-9]{14} ]]; then
# complete timestamp
file="$(eval "${ls_cmd[@]}" "${source}" | sed -E -e 's/.*[\/ ]([^\/]*)$/\1/' | grep "^${database}[\.\-]")"
full_path="${source}${file}"
else
# partial timestamp. search for matching object path
timestamp="$(eval "${ls_cmd[@]}" "${base}" | sed -E -e '/[0-9]{14}/!d' -e 's/.*([0-9]{14})\/$/\1/' | grep "${timestamp}")"
timestamp_count=$(wc -l <<<"${timestamp}")
[[ "${timestamp_count}" -gt 1 ]] && { echoerr "Error too many items found. Timestamp is not distinct."; exit 1; }
file="$(eval "${ls_cmd[@]}" "${base}${timestamp}/" | sed -E -e 's/.*[\/ ]([^\/]*)$/\1/' | grep "^${database}[\.\-]")"
full_path="${base}${timestamp}/${file}"
fi
fi
check_object_exists "${full_path}" || { echoerr "Error file not found"; exit 1; }
echo "${full_path}"
}

function find_object_file {
echoerr "find_object_file not implemented"
exit 1
}
28 changes: 11 additions & 17 deletions commands/load
Original file line number Diff line number Diff line change
Expand Up @@ -208,30 +208,24 @@ case "${#args[@]}" in
;;
esac

echo "Source: ${source}"
echo "SRC DB: ${src_database}"
echo "DEST DB: ${dest_database}"
echo "Source: ${source}"
echo "Source Database: ${src_database}"
echo "Destination Database: ${dest_database}"

# Set the umask, umask defaults to 0077 to keep files private during db dumping
umask "${umask:-0077}"

date="$(date --utc "+${date_format:-%Y%m%d%H%M%S}")"

get_storage_commands "${source}"

# Find latest
latest="$(eval "${ls_cmd[@]}" "${source}" | sed -E -e '/[0-9]{14}/!d' -e 's/.*([0-9]{14})\/$/\1/' | sort | tail -n1)"
echo "Latest: ${latest}"

file="$(eval "${ls_cmd[@]}" "${source}/${latest}/" | sed -E -e 's/.*[\/ ]([^\/]*)$/\1/' | grep "^${src_database}[\.\-]" || true)"
if [[ -z "${file}" ]]; then
echo "No save found for database in ${latest}"
file_path="$(eval ${find_object} "${source}" "${src_database}")"
if [[ -z "${file_path}" ]]; then
echo "No save found for database ${src_database} in ${source}"
exit 1
else
echo "Filepath: ${file_path}"
fi
echo "File: ${file}"

if [[ "${compression:-auto}" == "auto" ]]; then
case "${file##*.}" in
case "${file_path##*.}" in
"lz4")
compression="lz4"
;;
Expand Down Expand Up @@ -268,9 +262,9 @@ wait_mariadb "${host}" "${port:-3306}"
/usr/bin/mysql "${connection[@]}" -rs -e "DROP DATABASE IF EXISTS ${dest_database}; CREATE DATABASE ${dest_database}"

if [[ "${#sed_cmd[@]}" -gt 0 ]]; then
eval "${fetch_cmd[@]}" "${source}/${latest}/${file}" "-" | "${decompression_cmd[@]}" | sed -E "${sed_cmd[@]}" | /usr/bin/mysql "${connection_no_db[@]}" "${dest_database}"
eval "${fetch_cmd[@]}" "${file_path}" "-" | "${decompression_cmd[@]}" | sed -E "${sed_cmd[@]}" | /usr/bin/mysql "${connection_no_db[@]}" "${dest_database}"
else
eval "${fetch_cmd[@]}" "${source}/${latest}/${file}" "-" | "${decompression_cmd[@]}" | /usr/bin/mysql "${connection_no_db[@]}" "${dest_database}"
eval "${fetch_cmd[@]}" "${file_path}" "-" | "${decompression_cmd[@]}" | /usr/bin/mysql "${connection_no_db[@]}" "${dest_database}"
fi

echo ">> Finished."
7 changes: 2 additions & 5 deletions commands/load.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,5 @@ Command to load a sql dump from object storage (or filesystem) to a destination
* DROP and CREATE destination database
* Support for sed filters
* gsutil auth helper

## Limitations

* Source must be a directory named with the date ie 20200813000000 (must be 14 chars)
* Only the latest is loadable
* Source can either be a bucket root with timestamped directories named with the date ie 20200813000000 (must be 14 chars),
or a path to the dump to restore, or a date stamped path (or partial datestamp), eg `s3://mybucket`, or `s3://mybucket/20200813000000`, or `s3://mybucket/20200813000000/my_database.sql.lz4`
5 changes: 3 additions & 2 deletions tests/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,9 @@ docker run -d --name mariadb -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password ${MARI
docker run -d --name minio -p 9000:9000 ${MINIO_IMAGE}:${MINIO_TAG} server /data > /dev/null
docker run --rm -i --link minio -e MC_HOST_minio=http://minioadmin:minioadmin@minio:9000 minio/mc:latest --quiet mb minio/backup
docker run -i --name ${TEST_NAME}-save --link mariadb --link minio -e AWS_ACCESS_KEY_ID=minioadmin -e AWS_SECRET_ACCESS_KEY=minioadmin -e AWS_S3_ADDITIONAL_ARGS="--endpoint-url http://minio:9000" $TEST_CONTAINER save --host mariadb --password password s3://backup
docker run -i --name ${TEST_NAME}-load --link mariadb --link minio -e AWS_ACCESS_KEY_ID=minioadmin -e AWS_SECRET_ACCESS_KEY=minioadmin -e AWS_S3_ADDITIONAL_ARGS="--endpoint-url http://minio:9000" $TEST_CONTAINER load --host mariadb --password password s3://backup/ mysql newdb
cleanup mariadb minio ${TEST_NAME}-save ${TEST_NAME}-load
docker run -i --name ${TEST_NAME}-load1 --link mariadb --link minio -e AWS_ACCESS_KEY_ID=minioadmin -e AWS_SECRET_ACCESS_KEY=minioadmin -e AWS_S3_ADDITIONAL_ARGS="--endpoint-url http://minio:9000" $TEST_CONTAINER load --host mariadb --password password s3://backup/ mysql newdb
docker run -i --name ${TEST_NAME}-load2 --link mariadb --link minio -e AWS_ACCESS_KEY_ID=minioadmin -e AWS_SECRET_ACCESS_KEY=minioadmin -e AWS_S3_ADDITIONAL_ARGS="--endpoint-url http://minio:9000" $TEST_CONTAINER load --host mariadb --password password s3://backup/202 mysql newdb
cleanup mariadb minio ${TEST_NAME}-save ${TEST_NAME}-load1 ${TEST_NAME}-load2

echo "=> Test mysql command"
docker run -d --name mariadb -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password ${MARIADB_IMAGE}:${MARIADB_TAG} > /dev/null
Expand Down

0 comments on commit a918874

Please sign in to comment.