From 8e904e06dd2fe3424aa9496afd90b328a45e59e3 Mon Sep 17 00:00:00 2001 From: David Baker Effendi Date: Mon, 15 Jul 2024 18:54:22 +0200 Subject: [PATCH] Graph Read Benchmarks (#257) * Implemented read benchmarks for ODB * Implemented read benchmarks for TinkerGraph * Fixed infinity ops issue * Process commit * Draft Neo4j implemented * Neo4j working & started benchmarking SBT task * Running benchmarking via SBT and Scala Scripts + updated readme * Added full dataset * Pushed other drivers to benchmarking set --- .gitignore | 2 + README.md | 6 + build.sbt | 40 ++- .../plume/oss/drivers/GremlinDriver.scala | 2 +- .../oss/drivers/Neo4jEmbeddedDriver.scala | 4 +- project/DownloadHelper.scala | 48 ++++ runBenchmarks.sc | 38 +++ .../com/github/plume/oss/Benchmark.scala | 178 +++++------- .../oss/benchmarking/GraphReadBenchmark.scala | 88 ++++++ .../benchmarking/GraphWriteBenchmark.scala | 46 ++++ .../Neo4jEmbedReadBenchmark.scala | 254 ++++++++++++++++++ .../OverflowDbReadBenchmark.scala | 122 +++++++++ .../TinkerGraphReadBenchmark.scala | 121 +++++++++ 13 files changed, 834 insertions(+), 115 deletions(-) create mode 100644 project/DownloadHelper.scala create mode 100644 runBenchmarks.sc create mode 100644 src/main/scala/com/github/plume/oss/benchmarking/GraphReadBenchmark.scala create mode 100644 src/main/scala/com/github/plume/oss/benchmarking/GraphWriteBenchmark.scala create mode 100644 src/main/scala/com/github/plume/oss/benchmarking/Neo4jEmbedReadBenchmark.scala create mode 100644 src/main/scala/com/github/plume/oss/benchmarking/OverflowDbReadBenchmark.scala create mode 100644 src/main/scala/com/github/plume/oss/benchmarking/TinkerGraphReadBenchmark.scala diff --git a/.gitignore b/.gitignore index 00c70673..65ffdbe6 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ graph.xml gsql_client.* *.txt *.csv +/workspace +/results # Ignore Gradle GUI config gradle-app.setting diff --git a/README.md b/README.md index a07fa86b..d604f07e 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,12 @@ database backends. While the binary explains the available functions, the execut Jmh/runMain com.github.plume.oss.Benchmark overflowdb testprogram -o output -r results --storage-location test.cpg ``` +An automated script to run the benchmarks versus programs from the `defects4j` dataset is available under +`runBenchmarks.sc`, which can be executed with: +```bash +scala runBenchmarks.sc +``` + ## Logging Plume uses [SLF4J](http://www.slf4j.org/) as the logging fascade. diff --git a/build.sbt b/build.sbt index 338822f9..ac462aba 100644 --- a/build.sbt +++ b/build.sbt @@ -45,8 +45,8 @@ libraryDependencies ++= Seq( "org.openjdk.jmh" % "jmh-generator-reflection" % Versions.jmh, "org.openjdk.jmh" % "jmh-generator-asm" % Versions.jmh, "org.slf4j" % "slf4j-api" % Versions.slf4j, - "org.apache.logging.log4j" % "log4j-core" % Versions.log4j % Test, - "org.apache.logging.log4j" % "log4j-slf4j-impl" % Versions.log4j % Test, + "org.apache.logging.log4j" % "log4j-core" % Versions.log4j % Test, + "org.apache.logging.log4j" % "log4j-slf4j-impl" % Versions.log4j % Test, "org.scalatest" %% "scalatest" % Versions.scalatest % Test ) @@ -64,3 +64,39 @@ developers := List( Global / onChangedBuildSource := ReloadOnSourceChanges publishMavenStyle := true + +// Benchmark Tasks + +lazy val datasetDir = taskKey[File]("Dataset directory") +datasetDir := baseDirectory.value / "workspace" / "defects4j" +lazy val driversToBenchmark = taskKey[Seq[String]]("Drivers to benchmark") +driversToBenchmark := Seq("overflowdb", "tinkergraph", "neo4j-embedded") + +lazy val defect4jDataset = taskKey[Seq[(String, String)]]("JARs for projects used in `defects4j`") +defect4jDataset := + Seq( + "Chart" -> "https://repo1.maven.org/maven2/org/jfree/jfreechart/1.5.5/jfreechart-1.5.5.jar", + "Cli" -> "https://repo1.maven.org/maven2/commons-cli/commons-cli/1.8.0/commons-cli-1.8.0.jar", + "Closure" -> "https://repo1.maven.org/maven2/com/google/javascript/closure-compiler/v20240317/closure-compiler-v20240317.jar", + "Codec" -> "https://repo1.maven.org/maven2/commons-codec/commons-codec/1.17.0/commons-codec-1.17.0.jar", + "Collections" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-collections4/4.4/commons-collections4-4.4.jar", + "Compress" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-compress/1.26.2/commons-compress-1.26.2.jar", + "Csv" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-csv/1.11.0/commons-csv-1.11.0.jar", + "Gson" -> "https://repo1.maven.org/maven2/com/google/code/gson/gson/2.11.0/gson-2.11.0.jar", + "JacksonCore" -> "https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-core/2.17.2/jackson-core-2.17.2.jar", + "JacksonDatabind" -> "https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-databind/2.17.2/jackson-databind-2.17.2.jar", + "JacksonXml" -> "https://repo1.maven.org/maven2/com/fasterxml/jackson/dataformat/jackson-dataformat-xml/2.17.2/jackson-dataformat-xml-2.17.2.jar", + "Jsoup" -> "https://repo1.maven.org/maven2/org/jsoup/jsoup/1.18.1/jsoup-1.18.1.jar", + "JxPath" -> "https://repo1.maven.org/maven2/commons-jxpath/commons-jxpath/1.3/commons-jxpath-1.3.jar", + "Lang" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.14.0/commons-lang3-3.14.0.jar", + "Math" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1.jar", + "Mockito" -> "https://repo1.maven.org/maven2/org/mockito/mockito-core/5.12.0/mockito-core-5.12.0.jar", + "Time" -> "https://repo1.maven.org/maven2/joda-time/joda-time/2.12.7/joda-time-2.12.7.jar" + ) + +lazy val benchmarkDownloadTask = taskKey[Unit](s"Download `defects4j` candidates for benchmarking") +benchmarkDownloadTask := { + defect4jDataset.value.foreach { case (name, url) => + DownloadHelper.ensureIsAvailable(url, datasetDir.value / s"$name.jar") + } +} diff --git a/drivers/gremlin/src/main/scala/com/github/plume/oss/drivers/GremlinDriver.scala b/drivers/gremlin/src/main/scala/com/github/plume/oss/drivers/GremlinDriver.scala index 3b7591b1..f7b373b4 100644 --- a/drivers/gremlin/src/main/scala/com/github/plume/oss/drivers/GremlinDriver.scala +++ b/drivers/gremlin/src/main/scala/com/github/plume/oss/drivers/GremlinDriver.scala @@ -41,7 +41,7 @@ abstract class GremlinDriver(txMax: Int = 50) extends IDriver { * @return * a Gremlin graph traversal source. */ - protected def g(): GraphTraversalSource = { + def g(): GraphTraversalSource = { traversalSource match { case Some(conn) => conn case None => diff --git a/drivers/neo4j-embedded/src/main/scala/com/github/plume/oss/drivers/Neo4jEmbeddedDriver.scala b/drivers/neo4j-embedded/src/main/scala/com/github/plume/oss/drivers/Neo4jEmbeddedDriver.scala index 6dab0dc0..a58949d8 100644 --- a/drivers/neo4j-embedded/src/main/scala/com/github/plume/oss/drivers/Neo4jEmbeddedDriver.scala +++ b/drivers/neo4j-embedded/src/main/scala/com/github/plume/oss/drivers/Neo4jEmbeddedDriver.scala @@ -6,7 +6,7 @@ import com.github.plume.oss.util.BatchedUpdateUtil.* import io.shiftleft.codepropertygraph.generated.nodes.StoredNode import org.neo4j.configuration.GraphDatabaseSettings.DEFAULT_DATABASE_NAME import org.neo4j.dbms.api.{DatabaseManagementService, DatabaseManagementServiceBuilder} -import org.neo4j.graphdb.{Label, Transaction} +import org.neo4j.graphdb.{GraphDatabaseService, Label, Transaction} import org.slf4j.LoggerFactory import overflowdb.BatchedUpdate.{CreateEdge, DiffOrBuilder, SetNodeProperty} import overflowdb.{BatchedUpdate, DetachedNodeData} @@ -41,6 +41,8 @@ final class Neo4jEmbeddedDriver( }) } + def graph: GraphDatabaseService = graphDb + private def connect(): Unit = { managementService = new DatabaseManagementServiceBuilder(databaseDir.path).build() graphDb = managementService.database(databaseName) diff --git a/project/DownloadHelper.scala b/project/DownloadHelper.scala new file mode 100644 index 00000000..da86faf0 --- /dev/null +++ b/project/DownloadHelper.scala @@ -0,0 +1,48 @@ +import java.io.File +import java.net.URI +import java.nio.file.{Files, Path, Paths} + +object DownloadHelper { + val LocalStorageDir = Paths.get(".local/source-urls") + + /** Downloads the remote file from the given url if either + * - the localFile is not available, + * - or the url is different from the previously downloaded file + * - or we don't have the original url from the previously downloaded file + * We store the information about the previously downloaded urls and the localFile in `.local` + */ + def ensureIsAvailable(url: String, localFile: File): Unit = { + if (!localFile.exists() || Option(url) != previousUrlForLocalFile(localFile)) { + val localPath = localFile.toPath + Files.deleteIfExists(localPath) + + println(s"[INFO] downloading $url to $localFile") + sbt.io.Using.urlInputStream(new URI(url).toURL) { inputStream => + sbt.IO.transfer(inputStream, localFile) + } + + // persist url in local storage + val storageFile = storageInfoFileFor(localFile) + Files.createDirectories(storageFile.getParent) + Files.writeString(storageFile, url) + } + } + + private def relativePathToProjectRoot(path: Path): String = + Paths + .get("") + .toAbsolutePath + .normalize() + .relativize(path.toAbsolutePath) + .toString + + private def previousUrlForLocalFile(localFile: File): Option[String] = { + Option(storageInfoFileFor(localFile)) + .filter(Files.exists(_)) + .map(Files.readString) + .filter(_.nonEmpty) + } + + private def storageInfoFileFor(localFile: File): Path = + LocalStorageDir.resolve(relativePathToProjectRoot(localFile.toPath)) +} diff --git a/runBenchmarks.sc b/runBenchmarks.sc new file mode 100644 index 00000000..49607ce8 --- /dev/null +++ b/runBenchmarks.sc @@ -0,0 +1,38 @@ +import scala.sys.process.* +import java.nio.file.Files +import java.nio.file.Path +import scala.jdk.CollectionConverters.* + +@main def main(): Unit = { + println("[info] Ensuring compilation status and benchmark dataset availability...") + "sbt compile benchmarkDownloadTask".! + + val datasetDir = Path.of("workspace", "defects4j") + val resultsDir = Path.of("results") + + if (!Files.exists(resultsDir)) Files.createDirectory(resultsDir) + + def benchmarkArgs(driver: String, project: String): String = { + val projectDir = Path.of(datasetDir.toString, project) + val projectName = project.toLowerCase.stripSuffix(".jar") + val resultsPath = Path.of(resultsDir.toString, s"results-$driver-$projectName") + val outputPath = Path.of(resultsDir.toString, s"output-$driver-$projectName") + s"Jmh/runMain com.github.plume.oss.Benchmark $driver $projectDir -o ${outputPath.toAbsolutePath} -r ${resultsPath.toAbsolutePath}" + } + + println("[info] Available projects:") + val projects = Files.list(datasetDir).filter(_.toString.endsWith(".jar")).toList.asScala.toList + projects.foreach(p => println(s" - ${p.getFileName.toString}")) + + println("[info] Available drivers:") + val drivers = Seq("overflowdb") + drivers.foreach(d => println(s" - $d")) + + drivers.foreach { driver => + projects.foreach { project => + val cmd = benchmarkArgs(driver, project.getFileName.toString) + println(s"[info] Benchmarking '$driver' on project '$project'") + s"sbt \"$cmd\"".! + } + } +} diff --git a/src/main/scala/com/github/plume/oss/Benchmark.scala b/src/main/scala/com/github/plume/oss/Benchmark.scala index a2ff671d..8538bfdb 100644 --- a/src/main/scala/com/github/plume/oss/Benchmark.scala +++ b/src/main/scala/com/github/plume/oss/Benchmark.scala @@ -1,17 +1,22 @@ package com.github.plume.oss -import com.github.plume.oss.Benchmark.BenchmarkType.WRITE -import com.github.plume.oss.drivers.IDriver -import io.joern.jimple2cpg.Config +import better.files.File +import com.github.plume.oss.Benchmark.BenchmarkType.* +import com.github.plume.oss.benchmarking.{ + GraphWriteBenchmark, + Neo4jEmbedReadBenchmark, + OverflowDbReadBenchmark, + TinkerGraphReadBenchmark +} +import com.github.plume.oss.drivers.{IDriver, TinkerGraphDriver} import org.cache2k.benchmark.jmh.ForcedGcMemoryProfiler -import org.openjdk.jmh.annotations.{Benchmark, Level, Mode, Param, Scope, Setup, State, TearDown} -import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole} +import org.openjdk.jmh.annotations.Mode import org.openjdk.jmh.runner.Runner import org.openjdk.jmh.runner.options.{ChainedOptionsBuilder, OptionsBuilder, TimeValue} import upickle.default.* +import java.util import java.util.concurrent.TimeUnit -import scala.compiletime.uninitialized object Benchmark { @@ -28,13 +33,36 @@ object Benchmark { s"Finished WRITE JMH benchmarks. Results: ${config.jmhResultFile}-WRITE.csv; Output: ${config.jmhOutputFile}-WRITE.csv" ) -// val readOptsBenchmark = createOptionsBoilerPlate(config, READ) -// .include(classOf[OverflowDbBenchmark].getSimpleName) -// .build() -// new Runner(readOptsBenchmark).run() -// println( -// s"Finished READ JMH benchmarks. Results: ${config.jmhResultFile}-READ.csv; Output: ${config.jmhOutputFile}-READ.csv" -// ) + val readOptsBenchmark = config.dbConfig match { + case _: TinkerGraphConfig => + Option( + createOptionsBoilerPlate(config, READ) + .include(classOf[TinkerGraphReadBenchmark].getSimpleName) + .build() + ) + case _: OverflowDbConfig => + Option( + createOptionsBoilerPlate(config, READ) + .include(classOf[OverflowDbReadBenchmark].getSimpleName) + .build() + ) + case _: Neo4jEmbeddedConfig => + Option( + createOptionsBoilerPlate(config, READ) + .include(classOf[Neo4jEmbedReadBenchmark].getSimpleName) + .build() + ) + case x => + println(s"Read benchmarks are not available for ${x.getClass.getSimpleName}, skipping...") + Option.empty + } + readOptsBenchmark.foreach { opts => + new Runner(opts).run() + println( + s"Finished READ JMH benchmarks. Results: ${config.jmhResultFile}-READ.csv; Output: ${config.jmhOutputFile}-READ.csv" + ) + } + } } @@ -58,105 +86,33 @@ object Benchmark { case READ, WRITE } -} - -@State(Scope.Benchmark) -class GraphWriteBenchmark { - - @Param(Array("")) - var configStr: String = "" - var config: PlumeConfig = - if (!configStr.isBlank) read[PlumeConfig](configStr) else PlumeConfig() - var driver: IDriver = uninitialized - - @Setup - def setupBenchmark(params: BenchmarkParams): Unit = { - config = if (!configStr.isBlank) read[PlumeConfig](configStr) else PlumeConfig() - driver = config.dbConfig.toDriver - } - - @Setup(Level.Iteration) - def clearDriver(params: BenchmarkParams): Unit = { - driver.clear() - } - - @Benchmark - def createAst(blackhole: Blackhole): Unit = { - JimpleAst2Database(driver).createAst(Config().withInputPath(config.inputDir)) - Option(blackhole).foreach(_.consume(driver)) - } - - @TearDown - def cleanupBenchmark(): Unit = { - driver.clear() - driver.close() - } - -} - -sealed trait GraphReadBenchmark[D <: IDriver](protected val driver: D) { - - private var nodeStart: Array[Long] = new Array[Long](0) - private var fullNames: Array[String] = uninitialized + def initializeDriverAndInputDir(configStr: String, useCachedGraph: Boolean): (IDriver, PlumeConfig) = { + val config = if (!configStr.isBlank) read[PlumeConfig](configStr) else PlumeConfig() + if (!useCachedGraph) { + config.dbConfig match { + case OverflowDbConfig(storageLocation, _, _) if !useCachedGraph => + File(storageLocation).delete(swallowIOExceptions = true) + case TinkerGraphConfig(Some(importPath), _) if !useCachedGraph => + File(importPath).delete(swallowIOExceptions = true) + case Neo4jEmbeddedConfig(_, databaseDir, _) if !useCachedGraph => + File(databaseDir).delete(swallowIOExceptions = true) + case _ => + } + } + + val driver = if (useCachedGraph) { + config.dbConfig match { + case TinkerGraphConfig(Some(importPath), _) if File(importPath).exists => + val driver = config.dbConfig.toDriver.asInstanceOf[TinkerGraphDriver] + driver.importGraph(importPath) + driver + case _ => config.dbConfig.toDriver + } + } else { + config.dbConfig.toDriver + } - @Setup - def setupFun(params: BenchmarkParams): Unit = { - params.getBenchmark + driver -> config } - @Benchmark - def astDFS(blackhole: Blackhole): Int - - @Benchmark - def astUp(blackhole: Blackhole): Int - - @Benchmark - def orderSumChecked(blackhole: Blackhole): Int - - @Benchmark - def orderSumUnchecked(blackhole: Blackhole): Int - - @Benchmark - def orderSumExplicit(blackhole: Blackhole): Int - - @Benchmark - def callOrderTrav(blackhole: Blackhole): Int - - @Benchmark - def callOrderExplicit(blackhole: Blackhole): Int - - @Benchmark - def indexedMethodFullName(bh: Blackhole): Unit - - @Benchmark - def unindexedMethodFullName(bh: Blackhole): Unit - } - -//@State(Scope.Benchmark) -//class OverflowDbBenchmark(config: OverflowDbConfig) -// extends GraphReadBenchmark( -// ) { -// -// override def createAst(blackhole: Blackhole): Int = { -// 0 -// } -// -// override def astDFS(blackhole: Blackhole): Int = ??? -// -// override def astUp(blackhole: Blackhole): Int = ??? -// -// override def orderSumChecked(blackhole: Blackhole): Int = ??? -// -// override def orderSumUnchecked(blackhole: Blackhole): Int = ??? -// -// override def orderSumExplicit(blackhole: Blackhole): Int = ??? -// -// override def callOrderTrav(blackhole: Blackhole): Int = ??? -// -// override def callOrderExplicit(blackhole: Blackhole): Int = ??? -// -// override def indexedMethodFullName(bh: Blackhole): Unit = ??? -// -// override def unindexedMethodFullName(bh: Blackhole): Unit = ??? -//} diff --git a/src/main/scala/com/github/plume/oss/benchmarking/GraphReadBenchmark.scala b/src/main/scala/com/github/plume/oss/benchmarking/GraphReadBenchmark.scala new file mode 100644 index 00000000..559c74cc --- /dev/null +++ b/src/main/scala/com/github/plume/oss/benchmarking/GraphReadBenchmark.scala @@ -0,0 +1,88 @@ +package com.github.plume.oss.benchmarking + +import com.github.plume.oss +import com.github.plume.oss.{Benchmark, JimpleAst2Database, PlumeConfig, TinkerGraphConfig} +import com.github.plume.oss.drivers.{IDriver, TinkerGraphDriver} +import io.joern.jimple2cpg.Config +import org.openjdk.jmh.annotations.{Benchmark, Level, Param, Scope, Setup, State, TearDown, Timeout} +import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole} + +import java.util.concurrent.TimeUnit +import scala.compiletime.uninitialized + +@State(Scope.Benchmark) +@Timeout(2, TimeUnit.MINUTES) +trait GraphReadBenchmark { + + @Param(Array("")) + protected var configStr: String = "" + protected var config: PlumeConfig = uninitialized + protected var nodeStart: Array[Long] = new Array[Long](0) + protected var fullNames: Array[String] = uninitialized + protected var driver: IDriver = uninitialized + + protected def setupBenchmarkParams(params: BenchmarkParams): Unit = { + params.getBenchmark match { + case name if name.endsWith("astDFS") => + nodeStart = setupAstDfs() + case name if name.endsWith("astUp") => + nodeStart = setupAstUp() + case name if name.contains("orderSum") => + nodeStart = setUpOrderSum() + case name if name.contains("callOrder") => + nodeStart = setUpCallOrder() + case name if name.contains("MethodFullName") => + fullNames = setUpMethodFullName() + } + } + + protected def setupBenchmark(params: BenchmarkParams): Unit = { + val (driver_, config_) = oss.Benchmark.initializeDriverAndInputDir(configStr, useCachedGraph = true) + driver = driver_ + config = config_ + if (!driver.exists(1L)) { + JimpleAst2Database(driver).createAst(Config().withInputPath(config_.inputDir)) + config.dbConfig match { + case TinkerGraphConfig(_, Some(exportPath)) => driver.asInstanceOf[TinkerGraphDriver].exportGraph(exportPath) + case _ => + } + } + } + + protected def setupAstDfs(): Array[Long] + + protected def setupAstUp(): Array[Long] + + protected def setUpOrderSum(): Array[Long] + + protected def setUpCallOrder(): Array[Long] + + protected def setUpMethodFullName(): Array[String] + + @Benchmark + def astDFS(blackhole: Blackhole): Int + + @Benchmark + def astUp(blackhole: Blackhole): Int + + @Benchmark + def orderSum(blackhole: Blackhole): Int + + @Benchmark + def callOrderTrav(blackhole: Blackhole): Int + + @Benchmark + def callOrderExplicit(blackhole: Blackhole): Int + + @Benchmark + def indexedMethodFullName(bh: Blackhole): Unit + + @Benchmark + def unindexedMethodFullName(bh: Blackhole): Unit + + @TearDown + def cleanupBenchmark(): Unit = { + driver.close() + } + +} diff --git a/src/main/scala/com/github/plume/oss/benchmarking/GraphWriteBenchmark.scala b/src/main/scala/com/github/plume/oss/benchmarking/GraphWriteBenchmark.scala new file mode 100644 index 00000000..a887faea --- /dev/null +++ b/src/main/scala/com/github/plume/oss/benchmarking/GraphWriteBenchmark.scala @@ -0,0 +1,46 @@ +package com.github.plume.oss.benchmarking + +import com.github.plume.oss +import com.github.plume.oss.{Benchmark, JimpleAst2Database} +import com.github.plume.oss.drivers.IDriver +import io.joern.jimple2cpg.Config +import org.openjdk.jmh.annotations.{Benchmark, Level, Param, Scope, Setup, State, TearDown, Timeout} +import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole} + +import java.util.concurrent.TimeUnit +import scala.compiletime.uninitialized + +@State(Scope.Benchmark) +@Timeout(5, TimeUnit.MINUTES) +class GraphWriteBenchmark { + + @Param(Array("")) + var configStr: String = "" + private var driver: IDriver = uninitialized + private var inputDir: String = uninitialized + + @Setup + def setupBenchmark(params: BenchmarkParams): Unit = { + val (driver_, config) = oss.Benchmark.initializeDriverAndInputDir(configStr, useCachedGraph = false) + driver = driver_ + inputDir = config.inputDir + } + + @Setup(Level.Iteration) + def clearDriver(params: BenchmarkParams): Unit = { + driver.clear() + } + + @Benchmark + def createAst(blackhole: Blackhole): Unit = { + JimpleAst2Database(driver).createAst(Config().withInputPath(inputDir)) + Option(blackhole).foreach(_.consume(driver)) + } + + @TearDown + def cleanupBenchmark(): Unit = { + driver.clear() + driver.close() + } + +} diff --git a/src/main/scala/com/github/plume/oss/benchmarking/Neo4jEmbedReadBenchmark.scala b/src/main/scala/com/github/plume/oss/benchmarking/Neo4jEmbedReadBenchmark.scala new file mode 100644 index 00000000..985f9cc9 --- /dev/null +++ b/src/main/scala/com/github/plume/oss/benchmarking/Neo4jEmbedReadBenchmark.scala @@ -0,0 +1,254 @@ +package com.github.plume.oss.benchmarking + +import com.github.plume.oss.drivers.Neo4jEmbeddedDriver +import io.shiftleft.codepropertygraph.generated.EdgeTypes.AST +import io.shiftleft.codepropertygraph.generated.NodeTypes.{CALL, METHOD} +import io.shiftleft.codepropertygraph.generated.PropertyNames.{FULL_NAME, ORDER} +import org.neo4j.graphdb.GraphDatabaseService +import org.openjdk.jmh.annotations.{Benchmark, Scope, Setup, State} +import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole} +import overflowdb.traversal.* + +import java.util +import scala.compiletime.uninitialized +import scala.jdk.CollectionConverters.* +import scala.util.{Random, Using} + +@State(Scope.Benchmark) +class Neo4jEmbedReadBenchmark extends GraphReadBenchmark { + + private var g: GraphDatabaseService = uninitialized + + @Setup + override def setupBenchmark(params: BenchmarkParams): Unit = { + super.setupBenchmark(params) + g = driver.asInstanceOf[Neo4jEmbeddedDriver].graph + setupBenchmarkParams(params) + } + + override def setupAstDfs(): Array[Long] = { + Using.resource(g.beginTx) { tx => + tx.execute(s""" + |MATCH (n)-[$AST]->() + |WHERE NOT (n)<-[$AST]-() + |RETURN n.id AS ID + |""".stripMargin) + .map { result => result.get("ID").asInstanceOf[Long] } + .toArray + } + } + + override def setupAstUp(): Array[Long] = { + Using.resource(g.beginTx) { tx => + tx.execute(s""" + |MATCH (n)-[$AST]->() + |RETURN n.id AS ID + |""".stripMargin) + .map { result => result.get("ID").asInstanceOf[Long] } + .toArray + } + } + + override def setUpOrderSum(): Array[Long] = { + Using.resource(g.beginTx) { tx => + tx.execute(s""" + |MATCH (n) + |WHERE n.$ORDER IS NOT NULL + |RETURN n.id AS ID + |""".stripMargin) + .map { result => result.get("ID").asInstanceOf[Long] } + .toArray + } + } + + override def setUpCallOrder(): Array[Long] = { + Using.resource(g.beginTx) { tx => + val res = tx + .execute(s""" + |MATCH (n: $CALL) + |WHERE n.$ORDER IS NOT NULL + |RETURN n.id AS ID + |""".stripMargin) + .map { result => result.get("ID").asInstanceOf[Long] } + .toList + res.toArray + } + } + + override def setUpMethodFullName(): Array[String] = { + val fullNames_ = Using.resource(g.beginTx) { tx => + tx.execute(s""" + |MATCH (n: $METHOD) + |WHERE n.$FULL_NAME IS NOT NULL + |RETURN n.$FULL_NAME as $FULL_NAME + |""".stripMargin) + .map { result => result.get(FULL_NAME).asInstanceOf[String] } + .toArray + } + fullNames = new Random(1234).shuffle(fullNames_).toArray + fullNames.slice(0, math.min(1000, fullNames.length)) + } + + @Benchmark + override def astDFS(blackhole: Blackhole): Int = { + val stack = scala.collection.mutable.ArrayDeque.empty[Long] + stack.addAll(nodeStart) + var nnodes = nodeStart.length + while (stack.nonEmpty) { + val childrenIds = Using.resource(g.beginTx) { tx => + tx.execute( + s""" + |MATCH (n)-[AST]->(m) + |WHERE n.id = $$nodeId + |RETURN m.id AS ID + |""".stripMargin, + new util.HashMap[String, Object](1) { + put("nodeId", stack.removeLast().asInstanceOf[Object]) + } + ).map { result => result.get("ID").asInstanceOf[Long] } + .toArray + } + stack.appendAll(childrenIds) + nnodes += 1 + } + Option(blackhole).foreach(_.consume(nnodes)) + nnodes + } + + @Benchmark + override def astUp(blackhole: Blackhole): Int = { + var sumDepth = 0 + for (node <- nodeStart) { + var nodeId = node + def getResult = Using.resource(g.beginTx) { tx => + tx.execute( + s""" + |MATCH (n)<-[AST]-(m) + |WHERE n.id = $$nodeId + |RETURN m.id AS ID + |""".stripMargin, + new util.HashMap[String, Object](1) { + put("nodeId", nodeId.asInstanceOf[Object]) + } + ).map { result => result.get("ID").asInstanceOf[Long] } + .toArray + } + var result = getResult + def hasNext = result.nonEmpty + while (hasNext) { + sumDepth += 1 + nodeId = result.head + result = getResult + } + } + Option(blackhole).foreach(_.consume(sumDepth)) + sumDepth + } + + @Benchmark + override def orderSum(blackhole: Blackhole): Int = { + var sumOrder = 0 + for (nodeId <- nodeStart) { + val orderArr = Using.resource(g.beginTx) { tx => + tx.execute( + s""" + |MATCH (n) + |WHERE n.id = $$nodeId + |RETURN n.$ORDER AS $ORDER + |""".stripMargin, + new util.HashMap[String, Object](1) { + put("nodeId", nodeId.asInstanceOf[Object]) + } + ).map { result => result.get(ORDER).asInstanceOf[Int] } + .toArray + } + sumOrder += orderArr.head + } + Option(blackhole).foreach(_.consume(sumOrder)) + sumOrder + } + + @Benchmark + override def callOrderTrav(blackhole: Blackhole): Int = { + val res = Using.resource(g.beginTx) { tx => + tx.execute( + s""" + |MATCH (n: $CALL) + |WHERE n.$ORDER > 2 AND n.id IN $$nodeIds + |RETURN COUNT(n) AS SIZE + |""".stripMargin, + new util.HashMap[String, Object](1) { + put("nodeIds", nodeStart.toList.asJava.asInstanceOf[Object]) + } + ).map(_.get("SIZE").asInstanceOf[Int]) + .next() + } + Option(blackhole).foreach(_.consume(res)) + res + } + + @Benchmark + override def callOrderExplicit(blackhole: Blackhole): Int = { + var res = 0 + val nodes = Using.resource(g.beginTx) { tx => + tx.execute( + s""" + |MATCH (n: $CALL) + |WHERE n.id IN $$nodeIds + |RETURN n.$ORDER as $ORDER + |""".stripMargin, + new util.HashMap[String, Object](1) { + put("nodeIds", nodeStart.toList.asJava.asInstanceOf[Object]) + } + ).map(_.get(ORDER).asInstanceOf[Int]) + .toArray + } + for (order <- nodes) { + if (order > 2) res += 1 + } + Option(blackhole).foreach(_.consume(res)) + res + } + + @Benchmark + override def indexedMethodFullName(bh: Blackhole): Unit = { + fullNames.foreach { fullName => + Using + .resource(g.beginTx) { tx => + tx.execute( + s""" + |MATCH (n: $METHOD) + |WHERE n.$FULL_NAME = $$fullName + |RETURN n AS NODE + |""".stripMargin, + new util.HashMap[String, Object](1) { + put("fullName", fullName.asInstanceOf[Object]) + } + ).map(_.get("NODE")) + .toArray + } + .foreach(bh.consume) + } + } + + @Benchmark + override def unindexedMethodFullName(bh: Blackhole): Unit = { + fullNames.foreach { fullName => + Using + .resource(g.beginTx) { tx => + tx.execute( + s""" + |MATCH (n) + |WHERE n.$FULL_NAME = $$fullName and $METHOD IN labels(n) + |RETURN n AS NODE + |""".stripMargin, + new util.HashMap[String, Object](1) { + put("fullName", fullName.asInstanceOf[Object]) + } + ).map(_.get("NODE")) + .toArray + } + .foreach(bh.consume) + } + } +} diff --git a/src/main/scala/com/github/plume/oss/benchmarking/OverflowDbReadBenchmark.scala b/src/main/scala/com/github/plume/oss/benchmarking/OverflowDbReadBenchmark.scala new file mode 100644 index 00000000..e65c62ff --- /dev/null +++ b/src/main/scala/com/github/plume/oss/benchmarking/OverflowDbReadBenchmark.scala @@ -0,0 +1,122 @@ +package com.github.plume.oss.benchmarking + +import com.github.plume.oss.benchmarking.GraphReadBenchmark +import com.github.plume.oss.drivers.OverflowDbDriver +import io.shiftleft.codepropertygraph.generated.Cpg +import io.shiftleft.codepropertygraph.generated.EdgeTypes.AST +import io.shiftleft.codepropertygraph.generated.PropertyNames.ORDER +import io.shiftleft.codepropertygraph.generated.nodes.{Call, StoredNode} +import org.openjdk.jmh.annotations.{Benchmark, Scope, Setup, State} +import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole} +import overflowdb.PropertyKey +import overflowdb.traversal.* +import scala.compiletime.uninitialized +import scala.util.Random +import io.shiftleft.semanticcpg.language.* + +@State(Scope.Benchmark) +class OverflowDbReadBenchmark extends GraphReadBenchmark { + + private var cpg: Cpg = uninitialized + + @Setup + override def setupBenchmark(params: BenchmarkParams): Unit = { + super.setupBenchmark(params) + cpg = driver.asInstanceOf[OverflowDbDriver].cpg + setupBenchmarkParams(params) + } + + override def setupAstDfs(): Array[Long] = { + cpg.graph.nodes.iterator.collect { + case node if node.in(AST).isEmpty && node.out(AST).nonEmpty => node.id() + }.toArray + } + + override def setupAstUp(): Array[Long] = { + cpg.graph.nodes.iterator.map(_.id()).toArray + } + + override def setUpOrderSum(): Array[Long] = { + cpg.graph.nodes.iterator.filter(n => n.propertiesMap().containsKey(ORDER)).map(_.id()).toArray + } + + override def setUpCallOrder(): Array[Long] = { + cpg.graph.nodes.iterator.collect { case node: Call => node.id() }.toArray + } + + override def setUpMethodFullName(): Array[String] = { + fullNames = new Random(1234).shuffle(cpg.method.fullName.iterator).toArray + fullNames.slice(0, math.min(1000, fullNames.length)) + } + + @Benchmark + override def astDFS(blackhole: Blackhole): Int = { + val stack = scala.collection.mutable.ArrayDeque.empty[Long] + stack.addAll(nodeStart) + var nnodes = nodeStart.length + while (stack.nonEmpty) { + val nx = cpg.graph.node(stack.removeLast()).asInstanceOf[StoredNode] + stack.appendAll(nx._astOut.map(_.id)) + nnodes += 1 + } + Option(blackhole).foreach(_.consume(nnodes)) + nnodes + } + + @Benchmark + override def astUp(blackhole: Blackhole): Int = { + var sumDepth = 0 + for (node <- nodeStart) { + var p = cpg.graph.node(node) + while (p != null) { + sumDepth += 1 + p = p.asInstanceOf[StoredNode]._astIn.nextOption.orNull + } + } + Option(blackhole).foreach(_.consume(sumDepth)) + sumDepth + } + + @Benchmark + override def orderSum(blackhole: Blackhole): Int = { + var sumOrder = 0 + val propKey = PropertyKey[Int](ORDER) + for (node <- nodeStart.map(cpg.graph.node)) { + sumOrder += node.asInstanceOf[StoredNode].property(propKey) + } + Option(blackhole).foreach(_.consume(sumOrder)) + sumOrder + } + + @Benchmark + override def callOrderTrav(blackhole: Blackhole): Int = { + val res = cpg.graph.nodes(nodeStart*).iterator.asInstanceOf[Iterator[Call]].orderGt(2).size + Option(blackhole).foreach(_.consume(res)) + res + } + + @Benchmark + override def callOrderExplicit(blackhole: Blackhole): Int = { + var res = 0 + for (node <- cpg.graph.nodes(nodeStart*).iterator.asInstanceOf[Iterator[Call]]) { + if (node.order > 2) res += 1 + } + Option(blackhole).foreach(_.consume(res)) + res + } + + @Benchmark + override def indexedMethodFullName(bh: Blackhole): Unit = { + fullNames.foreach { fullName => + cpg.method.fullNameExact(fullName).foreach(bh.consume) + } + } + + @Benchmark + override def unindexedMethodFullName(bh: Blackhole): Unit = { + for { + str <- fullNames + found <- cpg.method.filter { _ => true }.fullNameExact(str) + } bh.consume(found) + } +} diff --git a/src/main/scala/com/github/plume/oss/benchmarking/TinkerGraphReadBenchmark.scala b/src/main/scala/com/github/plume/oss/benchmarking/TinkerGraphReadBenchmark.scala new file mode 100644 index 00000000..999c2516 --- /dev/null +++ b/src/main/scala/com/github/plume/oss/benchmarking/TinkerGraphReadBenchmark.scala @@ -0,0 +1,121 @@ +package com.github.plume.oss.benchmarking + +import com.github.plume.oss.benchmarking.GraphReadBenchmark +import com.github.plume.oss.drivers.TinkerGraphDriver +import io.shiftleft.codepropertygraph.generated.EdgeTypes.AST +import io.shiftleft.codepropertygraph.generated.NodeTypes.{CALL, METHOD} +import io.shiftleft.codepropertygraph.generated.PropertyNames.{FULL_NAME, ORDER} +import org.apache.tinkerpop.gremlin.process.traversal.P +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.{GraphTraversalSource, __} +import org.openjdk.jmh.annotations.{Benchmark, Scope, Setup, State} +import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole} + +import scala.jdk.CollectionConverters.* +import scala.compiletime.uninitialized +import scala.util.Random +import overflowdb.traversal.* + +@State(Scope.Benchmark) +class TinkerGraphReadBenchmark extends GraphReadBenchmark { + + private var g: () => GraphTraversalSource = uninitialized + + @Setup + override def setupBenchmark(params: BenchmarkParams): Unit = { + super.setupBenchmark(params) + g = () => driver.asInstanceOf[TinkerGraphDriver].g() + setupBenchmarkParams(params) + } + + override def setupAstDfs(): Array[Long] = { + g().V().where(__.and(__.not(__.inE(AST)), __.outE(AST))).id().asScala.map(_.asInstanceOf[Long]).toArray + } + + override def setupAstUp(): Array[Long] = { + g().V().id().asScala.map(_.asInstanceOf[Long]).toArray + } + + override def setUpOrderSum(): Array[Long] = { + g().V().has(ORDER).id().asScala.map(_.asInstanceOf[Long]).toArray + } + + override def setUpCallOrder(): Array[Long] = { + g().V().hasLabel(CALL).id().asScala.map(_.asInstanceOf[Long]).toArray + } + + override def setUpMethodFullName(): Array[String] = { + fullNames = new Random(1234).shuffle(g().V().hasLabel(METHOD).properties(FULL_NAME).value()).toArray + fullNames.slice(0, math.min(1000, fullNames.length)) + } + + @Benchmark + override def astDFS(blackhole: Blackhole): Int = { + val stack = scala.collection.mutable.ArrayDeque.empty[Long] + stack.addAll(nodeStart) + var nnodes = nodeStart.length + while (stack.nonEmpty) { + val nx = g().V(stack.removeLast()) + stack.appendAll(nx.out(AST).id().map(_.asInstanceOf[Long]).asScala.toArray) + nnodes += 1 + } + Option(blackhole).foreach(_.consume(nnodes)) + nnodes + } + + @Benchmark + override def astUp(blackhole: Blackhole): Int = { + var sumDepth = 0 + for (node <- nodeStart) { + var nodeId = node + def hasNext = g().V(nodeId).in(AST).hasNext + while (hasNext) { + sumDepth += 1 + nodeId = g().V(nodeId).in(AST).id().next().asInstanceOf[Long] + } + } + Option(blackhole).foreach(_.consume(sumDepth)) + sumDepth + } + + @Benchmark + override def orderSum(blackhole: Blackhole): Int = { + var sumOrder = 0 + for (node <- nodeStart.map(g().V(_))) { + sumOrder += node.properties(ORDER).value().next().asInstanceOf[Int] + } + Option(blackhole).foreach(_.consume(sumOrder)) + sumOrder + } + + @Benchmark + override def callOrderTrav(blackhole: Blackhole): Int = { + val res = g().V(nodeStart*).hasLabel(CALL).has(ORDER, P.gt(2)).size + Option(blackhole).foreach(_.consume(res)) + res + } + + @Benchmark + override def callOrderExplicit(blackhole: Blackhole): Int = { + var res = 0 + for (node <- g().V(nodeStart*).hasLabel(CALL)) { + if (node.property(ORDER).asInstanceOf[Int] > 2) res += 1 + } + Option(blackhole).foreach(_.consume(res)) + res + } + + @Benchmark + override def indexedMethodFullName(bh: Blackhole): Unit = { + fullNames.foreach { fullName => + g().V().hasLabel(METHOD).has(FULL_NAME, fullName).foreach(bh.consume) + } + } + + @Benchmark + override def unindexedMethodFullName(bh: Blackhole): Unit = { + for { + str <- fullNames + found <- g().V().hasLabel(METHOD).where(__.has(FULL_NAME, str)) + } bh.consume(found) + } +}