Skip to content

Commit

Permalink
Feature/si memory units (#686)
Browse files Browse the repository at this point in the history
* Use 1000 base memory units and 1024 as additional option

* Add documentation and VDSL3Helper code

* Use the correct units for docker (1024 base)

* Add SI based Nextflow labels and change to use bytes as value to be more explicit

* Switch docker to byte resolution for --memory

* update changelog PR#

* Refactor code to better use an expanded lookup table

* Add extra testbench for computationalRequirements and fix "10M" notation

* Remove unneeded case in match
  • Loading branch information
Grifs authored Apr 24, 2024
1 parent a0249e5 commit ff7d939
Show file tree
Hide file tree
Showing 12 changed files with 266 additions and 65 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

TODO add summary

## BREAKING CHANGES

* `computational requirements`: Use 1000-base units instead of 1024-base units for memory (PR #686). Additionally, the memory units `kib`, `mib`, `gib`, `tib`, and `pib` are added to support 1024-base definitions.

## MINOR CHANGES

* `error message`: Improve the error message when using an invalid field in the config (#PR #662). The error message now includes the field names that are not valid if that happens to be the case or otherwise a more general error message.
Expand Down
2 changes: 1 addition & 1 deletion docs/reference/viash_code_block/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ Meta-variables offer information on the runtime environment which you can use fr

* `functionality_name` (string): The name of the component, useful for logging. (Deprecated)

* `memory_*` (long): The maximum amount of memory a component is allowed to allocate. The following denominations are provided: `memory_b`, `memory_kb`, `memory_mb`, `memory_gb`, `memory_tb`, `memory_pb`. By default, this value will be undefined.
* `memory_*` (long): The maximum amount of memory a component is allowed to allocate. The following denominations are provided: `memory_b`, `memory_kb`, `memory_mb`, `memory_gb`, `memory_tb`, `memory_pb` for SI units (1000-base). `memory_kib`, `memory_mib`, `memory_gib`, `memory_tib`, `memory_pib` for IEC units (1024-base).. By default, this value will be undefined.

* `resources_dir` (string): Path to where the resources are stored.

Expand Down
15 changes: 10 additions & 5 deletions src/main/resources/io/viash/runners/nextflow/VDSL3Helper.nf
Original file line number Diff line number Diff line change
Expand Up @@ -274,11 +274,16 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
|\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" }
|\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" }
|if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then
| export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 ))
| export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 ))
| export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 ))
| export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 ))
| export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 ))
| export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 ))
| export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 ))
| export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 ))
| export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 ))
| export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 ))
| export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 ))
| export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 ))
| export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 ))
| export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 ))
| export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 ))
|fi
|
|# meta synonyms
Expand Down
21 changes: 6 additions & 15 deletions src/main/scala/io/viash/config/ComputationalRequirements.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ case class ComputationalRequirements(
@description("The maximum number of (logical) cpus a component is allowed to use.")
@example("cpus: 10", "yaml")
cpus: Option[Int] = None,
@description("The maximum amount of memory a component is allowed to allocate. Unit must be one of B, KB, MB, GB, TB or PB.")
@description("The maximum amount of memory a component is allowed to allocate. Unit must be one of B, KB, MB, GB, TB or PB for SI units (1000-base), or KiB, MiB, GiB, TiB or PiB for binary IEC units (1024-base).")
@example("memory: 10GB", "yaml")
memory: Option[String] = None,
@description("A list of commands which should be present on the system for the script to function.")
Expand All @@ -35,25 +35,16 @@ case class ComputationalRequirements(
) {

def memoryAsBytes: Option[BigInt] = {
val Regex = "^([0-9]+) *([kmgtp]b?|b)$".r
val lookup = Map(
"b" -> 0,
"kb" -> 1,
"mb" -> 2,
"gb" -> 3,
"tb" -> 4,
"pb" -> 5
)
val Regex = "^([0-9]+) *([kmgtp]i?b?|b)$".r
val lookup = "bkmgtp"
memory.map(_.toLowerCase()) match {
case Some(Regex(amnt, unit)) =>
val amntBigInt = BigInt(amnt)
val multiplier = BigInt(1024)
val exp = lookup(unit)
Some(amntBigInt * multiplier.pow(exp))
val exp = lookup.indexOf(unit.take(1))
val multiplier = if (unit.contains("i")) 1024 else 1000
Some(BigInt(amnt) * BigInt(multiplier).pow(exp))
case Some(m) =>
throw new RuntimeException(s"Invalid value \"$m\" as memory computational requirement.")
case None => None
case _ => ???
}
}
}
2 changes: 1 addition & 1 deletion src/main/scala/io/viash/config/Config.scala
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ case class Config(
"""@[Computational requirements](computational_requirements) related to running the component.
|`cpus` specifies the maximum number of (logical) cpus a component is allowed to use., whereas
|`memory` specifies the maximum amount of memory a component is allowed to allicate. Memory units must be
|in B, KB, MB, GB, TB or PB.""".stripMargin)
|in B, KB, MB, GB, TB or PB for SI units (1000-base), or KiB, MiB, GiB, TiB or PiB for binary IEC units (1024-base).""".stripMargin)
@example(
"""requirements:
| cpus: 5
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/io/viash/functionality/Functionality.scala
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ case class Functionality(
"""@[Computational requirements](computational_requirements) related to running the component.
|`cpus` specifies the maximum number of (logical) cpus a component is allowed to use., whereas
|`memory` specifies the maximum amount of memory a component is allowed to allicate. Memory units must be
|in B, KB, MB, GB, TB or PB.""".stripMargin)
|in B, KB, MB, GB, TB or PB for SI units (1000-base), or KiB, MiB, GiB, TiB or PiB for binary IEC units (1024-base).""".stripMargin)
@example(
"""requirements:
| cpus: 5
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/io/viash/runners/ExecutableRunner.scala
Original file line number Diff line number Diff line change
Expand Up @@ -509,8 +509,8 @@ final case class ExecutableRunner(
s"""
|if [[ "$$VIASH_ENGINE_TYPE" == "docker" ]]; then
| # helper function for filling in extra docker args
| if [ ! -z "$$VIASH_META_MEMORY_MB" ]; then
| VIASH_DOCKER_RUN_ARGS+=("--memory=$${VIASH_META_MEMORY_MB}m")
| if [ ! -z "$$VIASH_META_MEMORY_B" ]; then
| VIASH_DOCKER_RUN_ARGS+=("--memory=$${VIASH_META_MEMORY_B}")
| fi
| if [ ! -z "$$VIASH_META_CPUS" ]; then
| VIASH_DOCKER_RUN_ARGS+=("--cpus=$${VIASH_META_CPUS}")
Expand Down
54 changes: 35 additions & 19 deletions src/main/scala/io/viash/runners/nextflow/NextflowConfig.scala
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,20 @@ case class NextflowConfig(
)
@default("A series of default labels to specify memory and cpu constraints")
labels: ListMap[String, String] = ListMap(
NextflowConfig.binaryIterator
NextflowConfig.logarithmicIterator
.dropWhile(_ < 1 * NextflowConfig.GB)
.takeWhile(_ <= 512 * NextflowConfig.TB)
.takeWhile(_ <= 500 * NextflowConfig.TB)
.map{i =>
val kSize = NextflowConfig.humanReadableByteSize(i, "%1.0f", s => s.stripLeading().toLowerCase()) // "1gb"
val vSize = NextflowConfig.humanReadableByteSize(i, "%1.0f.", s => s.stripLeading()) // "1.GB"
(s"mem$kSize", s"memory = $vSize")
} ++
val kSize = NextflowConfig.humanReadableByteSize(i, "%1.0f", 1000, s => s.stripLeading().toLowerCase()) // "1gb"
(s"mem$kSize", s"memory = $i.B")
} ++
NextflowConfig.binaryIterator
.dropWhile(_ < 1 * NextflowConfig.GiB)
.takeWhile(_ <= 512 * NextflowConfig.TiB)
.map{i =>
val kSize = NextflowConfig.humanReadableByteSize(i, "%1.0f", 1024, s => s.stripLeading().toLowerCase()) // "1gib"
(s"mem$kSize", s"memory = $i.B")
} ++
NextflowConfig.logarithmicIterator
.takeWhile(_ <= 1000)
.map(i => (s"cpu$i", s"cpus = $i")) : _*
Expand All @@ -99,17 +105,23 @@ case class NextflowConfig(

object NextflowConfig {

val KB = 1024L
val MB = 1024L*1024
val GB = 1024L*1024*1024
val TB = 1024L*1024*1024*1024
val PB = 1024L*1024*1024*1024*1024
val EB = 1024L*1024*1024*1024*1024*1024
val KB = 1000L
val MB = 1000L*1000
val GB = 1000L*1000*1000
val TB = 1000L*1000*1000*1000
val PB = 1000L*1000*1000*1000*1000
val EB = 1000L*1000*1000*1000*1000*1000
val KiB = 1024L
val MiB = 1024L*1024
val GiB = 1024L*1024*1024
val TiB = 1024L*1024*1024*1024
val PiB = 1024L*1024*1024*1024*1024
val EiB = 1024L*1024*1024*1024*1024*1024

// Returns 1, 2, 5, 10, 20, 50, 100 ...
def logarithmicIterator: Seq[Int] =
for (i <- Seq.range(0, 9); j <- Seq(1, 2, 5) )
yield j * Math.pow(10, i).toInt
def logarithmicIterator: Seq[Long] =
for (i <- Seq.range(0, 19); j <- Seq(1, 2, 5) )
yield j * Math.pow(10, i).toLong

// Returns 1, 2, 4, 8, 16, 32, ...
def binaryIterator: Seq[Long] =
Expand All @@ -120,12 +132,16 @@ object NextflowConfig {
* @see https://stackoverflow.com/questions/35609587/human-readable-size-units-file-sizes-for-scala-code-like-duration
* Long is limited to 8 ExaByte - 1 byte
*/
def humanReadableByteSize(fileSize: Long, format: String = "%1.2f", unitTranslator: String => String = s => s): String = {
def humanReadableByteSize(fileSize: Long, format: String = "%1.2f", base: Int, unitTranslator: String => String = s => s): String = {
assert(base == 1000 || base == 1024)

if(fileSize <= 0) return "0 B"
val units: Array[String] = Array("B", "KB", "MB", "GB", "TB", "PB", "EB")
val digitGroup: Int = (Math.log10(fileSize.toDouble)/Math.log10(1024)).toInt
val unitsSI: Array[String] = Array("B", "KB", "MB", "GB", "TB", "PB", "EB")
val unitsIEC: Array[String] = Array("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB")
val units = if(base == 1000) unitsSI else unitsIEC
val digitGroup: Int = (Math.log10(fileSize.toDouble)/Math.log10(base)).toInt

val value = String.format(format, fileSize/Math.pow(1024, digitGroup))
val value = String.format(format, fileSize/Math.pow(base, digitGroup))
val unit = unitTranslator(s" ${units(digitGroup)}")

s"$value$unit"
Expand Down
39 changes: 27 additions & 12 deletions src/main/scala/io/viash/wrapper/BashWrapper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,12 @@ object BashWrapper {
LongArgument("memory_mb", required = false, dest = "meta"),
LongArgument("memory_gb", required = false, dest = "meta"),
LongArgument("memory_tb", required = false, dest = "meta"),
LongArgument("memory_pb", required = false, dest = "meta")
LongArgument("memory_pb", required = false, dest = "meta"),
LongArgument("memory_kib", required = false, dest = "meta"),
LongArgument("memory_mib", required = false, dest = "meta"),
LongArgument("memory_gib", required = false, dest = "meta"),
LongArgument("memory_tib", required = false, dest = "meta"),
LongArgument("memory_pib", required = false, dest = "meta")
)
}

Expand Down Expand Up @@ -735,18 +740,23 @@ object BashWrapper {
"""# helper function for parsing memory strings
|function ViashMemoryAsBytes {
| local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'`
| local memory_regex='^([0-9]+)([kmgtp]b?|b)$'
| local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$'
| if [[ $memory =~ $memory_regex ]]; then
| local number=${memory/[^0-9]*/}
| local symbol=${memory/*[0-9]/}
|
| case $symbol in
| b) memory_b=$number ;;
| kb|k) memory_b=$(( $number * 1024 )) ;;
| mb|m) memory_b=$(( $number * 1024 * 1024 )) ;;
| gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;;
| tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;;
| pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;;
| kb|k) memory_b=$(( $number * 1000 )) ;;
| mb|m) memory_b=$(( $number * 1000 * 1000 )) ;;
| gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;;
| tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;;
| pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;;
| kib|ki) memory_b=$(( $number * 1024 )) ;;
| mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;;
| gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;;
| tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;;
| pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;;
| esac
| echo "$memory_b"
| fi
Expand All @@ -756,11 +766,16 @@ object BashWrapper {
| VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY`
| # do not define other variables if memory_b is an empty string
| if [ ! -z "$VIASH_META_MEMORY_B" ]; then
| VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 ))
| VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 ))
| VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 ))
| VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 ))
| VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 ))
| VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 ))
| VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 ))
| VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 ))
| VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 ))
| VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 ))
| VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 ))
| VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 ))
| VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 ))
| VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 ))
| VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 ))
| else
| # unset memory if string is empty
| unset $VIASH_META_MEMORY_B
Expand Down
7 changes: 6 additions & 1 deletion src/test/resources/test_languages/bash/code.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,9 @@ output "meta_memory_kb: |$meta_memory_kb|"
output "meta_memory_mb: |$meta_memory_mb|"
output "meta_memory_gb: |$meta_memory_gb|"
output "meta_memory_tb: |$meta_memory_tb|"
output "meta_memory_pb: |$meta_memory_pb|"
output "meta_memory_pb: |$meta_memory_pb|"
output "meta_memory_kib: |$meta_memory_kib|"
output "meta_memory_mib: |$meta_memory_mib|"
output "meta_memory_gib: |$meta_memory_gib|"
output "meta_memory_tib: |$meta_memory_tib|"
output "meta_memory_pib: |$meta_memory_pib|"
48 changes: 40 additions & 8 deletions src/test/resources/test_languages/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,17 @@ grep -q 'meta_name: |test_languages_.*|' output.txt
grep -q 'meta_functionality_name: |test_languages_.*|' output.txt
grep -q 'meta_resources_dir: |..*|' output.txt
grep -q 'meta_cpus: |2|' output.txt
grep -q 'meta_memory_b: |2147483648|' output.txt
grep -q 'meta_memory_kb: |2097152|' output.txt
grep -q 'meta_memory_mb: |2048|' output.txt
grep -q 'meta_memory_b: |2000000000|' output.txt
grep -q 'meta_memory_kb: |2000000|' output.txt
grep -q 'meta_memory_mb: |2000|' output.txt
grep -q 'meta_memory_gb: |2|' output.txt
grep -q 'meta_memory_tb: |1|' output.txt
grep -q 'meta_memory_pb: |1|' output.txt
grep -q 'meta_memory_kib: |1953125|' output.txt
grep -q 'meta_memory_mib: |1908|' output.txt
grep -q 'meta_memory_gib: |2|' output.txt
grep -q 'meta_memory_tib: |1|' output.txt
grep -q 'meta_memory_pib: |1|' output.txt

grep -q 'head of input: |if you can read this,|' output.txt
grep -q 'head of resource1: |if you can read this,|' output.txt
Expand Down Expand Up @@ -78,17 +83,44 @@ grep -q 'meta_name: |test_languages_.*|' output2.txt
grep -q 'meta_functionality_name: |test_languages_.*|' output2.txt
grep -q 'meta_resources_dir: |..*|' output2.txt
grep -q 'meta_cpus: |666|' output2.txt
grep -q 'meta_memory_b: |112589990684262400|' output2.txt
grep -q 'meta_memory_kb: |109951162777600|' output2.txt
grep -q 'meta_memory_mb: |107374182400|' output2.txt
grep -q 'meta_memory_gb: |104857600|' output2.txt
grep -q 'meta_memory_tb: |102400|' output2.txt
grep -q 'meta_memory_b: |100000000000000000|' output2.txt
grep -q 'meta_memory_kb: |100000000000000|' output2.txt
grep -q 'meta_memory_mb: |100000000000|' output2.txt
grep -q 'meta_memory_gb: |100000000|' output2.txt
grep -q 'meta_memory_tb: |100000|' output2.txt
grep -q 'meta_memory_pb: |100|' output2.txt
grep -q 'meta_memory_kib: |97656250000000|' output2.txt
grep -q 'meta_memory_mib: |95367431641|' output2.txt
grep -q 'meta_memory_gib: |93132258|' output2.txt
grep -q 'meta_memory_tib: |90950|' output2.txt
grep -q 'meta_memory_pib: |89|' output2.txt

grep -q 'head of input: |this file is only for testing|' output2.txt
grep -q 'head of resource1: |if you can read this,|' output2.txt


echo ">>> Checking whether output is correct with minimal parameters, but with 1024-base memory"
"$meta_executable" \
"resource2.txt" \
--real_number 123.456 \
--whole_number=789 \
-s "a \\ b \$ c \` d \" e ' f \n g # h @ i { j } k \"\"\" l ''' m todo_add_back_DOLLAR_VIASH_TEMP n : o ; p" \
---cpus 666 \
---memory 100PiB \
> output2.txt

grep -q 'meta_memory_b: |112589990684262400|' output2.txt
grep -q 'meta_memory_kb: |112589990684263|' output2.txt
grep -q 'meta_memory_mb: |112589990685|' output2.txt
grep -q 'meta_memory_gb: |112589991|' output2.txt
grep -q 'meta_memory_tb: |112590|' output2.txt
grep -q 'meta_memory_pb: |113|' output2.txt
grep -q 'meta_memory_kib: |109951162777600|' output2.txt
grep -q 'meta_memory_mib: |107374182400|' output2.txt
grep -q 'meta_memory_gib: |104857600|' output2.txt
grep -q 'meta_memory_tib: |102400|' output2.txt
grep -q 'meta_memory_pib: |100|' output2.txt

if [[ $meta_name == "bash" || $meta_name == "js" ]]; then
# This currently only works fully on bash and javascript

Expand Down
Loading

0 comments on commit ff7d939

Please sign in to comment.