From 8e904e06dd2fe3424aa9496afd90b328a45e59e3 Mon Sep 17 00:00:00 2001
From: David Baker Effendi <dbe@sun.ac.za>
Date: Mon, 15 Jul 2024 18:54:22 +0200
Subject: [PATCH] Graph Read Benchmarks (#257)

* Implemented read benchmarks for ODB

* Implemented read benchmarks for TinkerGraph

* Fixed infinity ops issue

* Process commit

* Draft Neo4j implemented

* Neo4j working & started benchmarking SBT task

* Running benchmarking via SBT and Scala Scripts + updated readme

* Added full dataset

* Pushed other drivers to benchmarking set
---
 .gitignore                                    |   2 +
 README.md                                     |   6 +
 build.sbt                                     |  40 ++-
 .../plume/oss/drivers/GremlinDriver.scala     |   2 +-
 .../oss/drivers/Neo4jEmbeddedDriver.scala     |   4 +-
 project/DownloadHelper.scala                  |  48 ++++
 runBenchmarks.sc                              |  38 +++
 .../com/github/plume/oss/Benchmark.scala      | 178 +++++-------
 .../oss/benchmarking/GraphReadBenchmark.scala |  88 ++++++
 .../benchmarking/GraphWriteBenchmark.scala    |  46 ++++
 .../Neo4jEmbedReadBenchmark.scala             | 254 ++++++++++++++++++
 .../OverflowDbReadBenchmark.scala             | 122 +++++++++
 .../TinkerGraphReadBenchmark.scala            | 121 +++++++++
 13 files changed, 834 insertions(+), 115 deletions(-)
 create mode 100644 project/DownloadHelper.scala
 create mode 100644 runBenchmarks.sc
 create mode 100644 src/main/scala/com/github/plume/oss/benchmarking/GraphReadBenchmark.scala
 create mode 100644 src/main/scala/com/github/plume/oss/benchmarking/GraphWriteBenchmark.scala
 create mode 100644 src/main/scala/com/github/plume/oss/benchmarking/Neo4jEmbedReadBenchmark.scala
 create mode 100644 src/main/scala/com/github/plume/oss/benchmarking/OverflowDbReadBenchmark.scala
 create mode 100644 src/main/scala/com/github/plume/oss/benchmarking/TinkerGraphReadBenchmark.scala

diff --git a/.gitignore b/.gitignore
index 00c70673..65ffdbe6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,8 @@ graph.xml
 gsql_client.*
 *.txt
 *.csv
+/workspace
+/results
 
 # Ignore Gradle GUI config
 gradle-app.setting
diff --git a/README.md b/README.md
index a07fa86b..d604f07e 100644
--- a/README.md
+++ b/README.md
@@ -143,6 +143,12 @@ database backends. While the binary explains the available functions, the execut
 Jmh/runMain com.github.plume.oss.Benchmark overflowdb testprogram -o output -r results --storage-location test.cpg
 ```
 
+An automated script to run the benchmarks versus programs from the `defects4j` dataset is available under 
+`runBenchmarks.sc`, which can be executed with:
+```bash
+scala runBenchmarks.sc
+```
+
 ## Logging
 
 Plume uses [SLF4J](http://www.slf4j.org/) as the logging fascade.
diff --git a/build.sbt b/build.sbt
index 338822f9..ac462aba 100644
--- a/build.sbt
+++ b/build.sbt
@@ -45,8 +45,8 @@ libraryDependencies ++= Seq(
   "org.openjdk.jmh"          % "jmh-generator-reflection" % Versions.jmh,
   "org.openjdk.jmh"          % "jmh-generator-asm"        % Versions.jmh,
   "org.slf4j"                % "slf4j-api"                % Versions.slf4j,
-  "org.apache.logging.log4j" % "log4j-core"               % Versions.log4j % Test,
-  "org.apache.logging.log4j" % "log4j-slf4j-impl"         % Versions.log4j % Test,
+  "org.apache.logging.log4j" % "log4j-core"               % Versions.log4j     % Test,
+  "org.apache.logging.log4j" % "log4j-slf4j-impl"         % Versions.log4j     % Test,
   "org.scalatest"           %% "scalatest"                % Versions.scalatest % Test
 )
 
@@ -64,3 +64,39 @@ developers := List(
 Global / onChangedBuildSource := ReloadOnSourceChanges
 
 publishMavenStyle := true
+
+// Benchmark Tasks
+
+lazy val datasetDir = taskKey[File]("Dataset directory")
+datasetDir := baseDirectory.value / "workspace" / "defects4j"
+lazy val driversToBenchmark = taskKey[Seq[String]]("Drivers to benchmark")
+driversToBenchmark := Seq("overflowdb", "tinkergraph", "neo4j-embedded")
+
+lazy val defect4jDataset = taskKey[Seq[(String, String)]]("JARs for projects used in `defects4j`")
+defect4jDataset :=
+  Seq(
+    "Chart" -> "https://repo1.maven.org/maven2/org/jfree/jfreechart/1.5.5/jfreechart-1.5.5.jar",
+    "Cli"   -> "https://repo1.maven.org/maven2/commons-cli/commons-cli/1.8.0/commons-cli-1.8.0.jar",
+    "Closure" -> "https://repo1.maven.org/maven2/com/google/javascript/closure-compiler/v20240317/closure-compiler-v20240317.jar",
+    "Codec" -> "https://repo1.maven.org/maven2/commons-codec/commons-codec/1.17.0/commons-codec-1.17.0.jar",
+    "Collections" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-collections4/4.4/commons-collections4-4.4.jar",
+    "Compress" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-compress/1.26.2/commons-compress-1.26.2.jar",
+    "Csv" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-csv/1.11.0/commons-csv-1.11.0.jar",
+    "Gson" -> "https://repo1.maven.org/maven2/com/google/code/gson/gson/2.11.0/gson-2.11.0.jar",
+    "JacksonCore" -> "https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-core/2.17.2/jackson-core-2.17.2.jar",
+    "JacksonDatabind" -> "https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-databind/2.17.2/jackson-databind-2.17.2.jar",
+    "JacksonXml" -> "https://repo1.maven.org/maven2/com/fasterxml/jackson/dataformat/jackson-dataformat-xml/2.17.2/jackson-dataformat-xml-2.17.2.jar",
+    "Jsoup" -> "https://repo1.maven.org/maven2/org/jsoup/jsoup/1.18.1/jsoup-1.18.1.jar",
+    "JxPath" -> "https://repo1.maven.org/maven2/commons-jxpath/commons-jxpath/1.3/commons-jxpath-1.3.jar",
+    "Lang" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.14.0/commons-lang3-3.14.0.jar",
+    "Math" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1.jar",
+    "Mockito" -> "https://repo1.maven.org/maven2/org/mockito/mockito-core/5.12.0/mockito-core-5.12.0.jar",
+    "Time" -> "https://repo1.maven.org/maven2/joda-time/joda-time/2.12.7/joda-time-2.12.7.jar"
+  )
+
+lazy val benchmarkDownloadTask = taskKey[Unit](s"Download `defects4j` candidates for benchmarking")
+benchmarkDownloadTask := {
+  defect4jDataset.value.foreach { case (name, url) =>
+    DownloadHelper.ensureIsAvailable(url, datasetDir.value / s"$name.jar")
+  }
+}
diff --git a/drivers/gremlin/src/main/scala/com/github/plume/oss/drivers/GremlinDriver.scala b/drivers/gremlin/src/main/scala/com/github/plume/oss/drivers/GremlinDriver.scala
index 3b7591b1..f7b373b4 100644
--- a/drivers/gremlin/src/main/scala/com/github/plume/oss/drivers/GremlinDriver.scala
+++ b/drivers/gremlin/src/main/scala/com/github/plume/oss/drivers/GremlinDriver.scala
@@ -41,7 +41,7 @@ abstract class GremlinDriver(txMax: Int = 50) extends IDriver {
     * @return
     *   a Gremlin graph traversal source.
     */
-  protected def g(): GraphTraversalSource = {
+  def g(): GraphTraversalSource = {
     traversalSource match {
       case Some(conn) => conn
       case None =>
diff --git a/drivers/neo4j-embedded/src/main/scala/com/github/plume/oss/drivers/Neo4jEmbeddedDriver.scala b/drivers/neo4j-embedded/src/main/scala/com/github/plume/oss/drivers/Neo4jEmbeddedDriver.scala
index 6dab0dc0..a58949d8 100644
--- a/drivers/neo4j-embedded/src/main/scala/com/github/plume/oss/drivers/Neo4jEmbeddedDriver.scala
+++ b/drivers/neo4j-embedded/src/main/scala/com/github/plume/oss/drivers/Neo4jEmbeddedDriver.scala
@@ -6,7 +6,7 @@ import com.github.plume.oss.util.BatchedUpdateUtil.*
 import io.shiftleft.codepropertygraph.generated.nodes.StoredNode
 import org.neo4j.configuration.GraphDatabaseSettings.DEFAULT_DATABASE_NAME
 import org.neo4j.dbms.api.{DatabaseManagementService, DatabaseManagementServiceBuilder}
-import org.neo4j.graphdb.{Label, Transaction}
+import org.neo4j.graphdb.{GraphDatabaseService, Label, Transaction}
 import org.slf4j.LoggerFactory
 import overflowdb.BatchedUpdate.{CreateEdge, DiffOrBuilder, SetNodeProperty}
 import overflowdb.{BatchedUpdate, DetachedNodeData}
@@ -41,6 +41,8 @@ final class Neo4jEmbeddedDriver(
     })
   }
 
+  def graph: GraphDatabaseService = graphDb
+
   private def connect(): Unit = {
     managementService = new DatabaseManagementServiceBuilder(databaseDir.path).build()
     graphDb = managementService.database(databaseName)
diff --git a/project/DownloadHelper.scala b/project/DownloadHelper.scala
new file mode 100644
index 00000000..da86faf0
--- /dev/null
+++ b/project/DownloadHelper.scala
@@ -0,0 +1,48 @@
+import java.io.File
+import java.net.URI
+import java.nio.file.{Files, Path, Paths}
+
+object DownloadHelper {
+  val LocalStorageDir = Paths.get(".local/source-urls")
+
+  /** Downloads the remote file from the given url if either
+   * - the localFile is not available,
+   * - or the url is different from the previously downloaded file
+   * - or we don't have the original url from the previously downloaded file
+   * We store the information about the previously downloaded urls and the localFile in `.local`
+   */
+  def ensureIsAvailable(url: String, localFile: File): Unit = {
+    if (!localFile.exists() || Option(url) != previousUrlForLocalFile(localFile)) {
+      val localPath = localFile.toPath
+      Files.deleteIfExists(localPath)
+
+      println(s"[INFO] downloading $url to $localFile")
+      sbt.io.Using.urlInputStream(new URI(url).toURL) { inputStream =>
+        sbt.IO.transfer(inputStream, localFile)
+      }
+
+      // persist url in local storage
+      val storageFile = storageInfoFileFor(localFile)
+      Files.createDirectories(storageFile.getParent)
+      Files.writeString(storageFile, url)
+    }
+  }
+
+  private def relativePathToProjectRoot(path: Path): String =
+    Paths
+      .get("")
+      .toAbsolutePath
+      .normalize()
+      .relativize(path.toAbsolutePath)
+      .toString
+
+  private def previousUrlForLocalFile(localFile: File): Option[String] = {
+    Option(storageInfoFileFor(localFile))
+      .filter(Files.exists(_))
+      .map(Files.readString)
+      .filter(_.nonEmpty)
+  }
+
+  private def storageInfoFileFor(localFile: File): Path =
+    LocalStorageDir.resolve(relativePathToProjectRoot(localFile.toPath))
+}
diff --git a/runBenchmarks.sc b/runBenchmarks.sc
new file mode 100644
index 00000000..49607ce8
--- /dev/null
+++ b/runBenchmarks.sc
@@ -0,0 +1,38 @@
+import scala.sys.process.*
+import java.nio.file.Files
+import java.nio.file.Path
+import scala.jdk.CollectionConverters.*
+
+@main def main(): Unit = {
+  println("[info] Ensuring compilation status and benchmark dataset availability...")
+  "sbt compile benchmarkDownloadTask".!
+
+  val datasetDir = Path.of("workspace", "defects4j")
+  val resultsDir = Path.of("results")
+
+  if (!Files.exists(resultsDir)) Files.createDirectory(resultsDir)
+
+  def benchmarkArgs(driver: String, project: String): String = {
+    val projectDir = Path.of(datasetDir.toString, project)
+    val projectName = project.toLowerCase.stripSuffix(".jar")
+    val resultsPath = Path.of(resultsDir.toString, s"results-$driver-$projectName")
+    val outputPath = Path.of(resultsDir.toString, s"output-$driver-$projectName")
+    s"Jmh/runMain com.github.plume.oss.Benchmark $driver $projectDir -o ${outputPath.toAbsolutePath} -r ${resultsPath.toAbsolutePath}"
+  }
+
+  println("[info] Available projects:")
+  val projects = Files.list(datasetDir).filter(_.toString.endsWith(".jar")).toList.asScala.toList
+  projects.foreach(p => println(s" - ${p.getFileName.toString}"))
+
+  println("[info] Available drivers:")
+  val drivers = Seq("overflowdb")
+  drivers.foreach(d => println(s" - $d"))
+
+  drivers.foreach { driver =>
+    projects.foreach { project =>
+      val cmd = benchmarkArgs(driver, project.getFileName.toString)
+      println(s"[info] Benchmarking '$driver' on project '$project'")
+      s"sbt \"$cmd\"".!
+    }
+  }
+}
diff --git a/src/main/scala/com/github/plume/oss/Benchmark.scala b/src/main/scala/com/github/plume/oss/Benchmark.scala
index a2ff671d..8538bfdb 100644
--- a/src/main/scala/com/github/plume/oss/Benchmark.scala
+++ b/src/main/scala/com/github/plume/oss/Benchmark.scala
@@ -1,17 +1,22 @@
 package com.github.plume.oss
 
-import com.github.plume.oss.Benchmark.BenchmarkType.WRITE
-import com.github.plume.oss.drivers.IDriver
-import io.joern.jimple2cpg.Config
+import better.files.File
+import com.github.plume.oss.Benchmark.BenchmarkType.*
+import com.github.plume.oss.benchmarking.{
+  GraphWriteBenchmark,
+  Neo4jEmbedReadBenchmark,
+  OverflowDbReadBenchmark,
+  TinkerGraphReadBenchmark
+}
+import com.github.plume.oss.drivers.{IDriver, TinkerGraphDriver}
 import org.cache2k.benchmark.jmh.ForcedGcMemoryProfiler
-import org.openjdk.jmh.annotations.{Benchmark, Level, Mode, Param, Scope, Setup, State, TearDown}
-import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole}
+import org.openjdk.jmh.annotations.Mode
 import org.openjdk.jmh.runner.Runner
 import org.openjdk.jmh.runner.options.{ChainedOptionsBuilder, OptionsBuilder, TimeValue}
 import upickle.default.*
 
+import java.util
 import java.util.concurrent.TimeUnit
-import scala.compiletime.uninitialized
 
 object Benchmark {
 
@@ -28,13 +33,36 @@ object Benchmark {
           s"Finished WRITE JMH benchmarks. Results: ${config.jmhResultFile}-WRITE.csv; Output: ${config.jmhOutputFile}-WRITE.csv"
         )
 
-//      val readOptsBenchmark = createOptionsBoilerPlate(config, READ)
-//        .include(classOf[OverflowDbBenchmark].getSimpleName)
-//        .build()
-//      new Runner(readOptsBenchmark).run()
-//      println(
-//        s"Finished READ JMH benchmarks. Results: ${config.jmhResultFile}-READ.csv; Output: ${config.jmhOutputFile}-READ.csv"
-//      )
+        val readOptsBenchmark = config.dbConfig match {
+          case _: TinkerGraphConfig =>
+            Option(
+              createOptionsBoilerPlate(config, READ)
+                .include(classOf[TinkerGraphReadBenchmark].getSimpleName)
+                .build()
+            )
+          case _: OverflowDbConfig =>
+            Option(
+              createOptionsBoilerPlate(config, READ)
+                .include(classOf[OverflowDbReadBenchmark].getSimpleName)
+                .build()
+            )
+          case _: Neo4jEmbeddedConfig =>
+            Option(
+              createOptionsBoilerPlate(config, READ)
+                .include(classOf[Neo4jEmbedReadBenchmark].getSimpleName)
+                .build()
+            )
+          case x =>
+            println(s"Read benchmarks are not available for ${x.getClass.getSimpleName}, skipping...")
+            Option.empty
+        }
+        readOptsBenchmark.foreach { opts =>
+          new Runner(opts).run()
+          println(
+            s"Finished READ JMH benchmarks. Results: ${config.jmhResultFile}-READ.csv; Output: ${config.jmhOutputFile}-READ.csv"
+          )
+        }
+
       }
   }
 
@@ -58,105 +86,33 @@ object Benchmark {
     case READ, WRITE
   }
 
-}
-
-@State(Scope.Benchmark)
-class GraphWriteBenchmark {
-
-  @Param(Array(""))
-  var configStr: String = ""
-  var config: PlumeConfig =
-    if (!configStr.isBlank) read[PlumeConfig](configStr) else PlumeConfig()
-  var driver: IDriver = uninitialized
-
-  @Setup
-  def setupBenchmark(params: BenchmarkParams): Unit = {
-    config = if (!configStr.isBlank) read[PlumeConfig](configStr) else PlumeConfig()
-    driver = config.dbConfig.toDriver
-  }
-
-  @Setup(Level.Iteration)
-  def clearDriver(params: BenchmarkParams): Unit = {
-    driver.clear()
-  }
-
-  @Benchmark
-  def createAst(blackhole: Blackhole): Unit = {
-    JimpleAst2Database(driver).createAst(Config().withInputPath(config.inputDir))
-    Option(blackhole).foreach(_.consume(driver))
-  }
-
-  @TearDown
-  def cleanupBenchmark(): Unit = {
-    driver.clear()
-    driver.close()
-  }
-
-}
-
-sealed trait GraphReadBenchmark[D <: IDriver](protected val driver: D) {
-
-  private var nodeStart: Array[Long]   = new Array[Long](0)
-  private var fullNames: Array[String] = uninitialized
+  def initializeDriverAndInputDir(configStr: String, useCachedGraph: Boolean): (IDriver, PlumeConfig) = {
+    val config = if (!configStr.isBlank) read[PlumeConfig](configStr) else PlumeConfig()
+    if (!useCachedGraph) {
+      config.dbConfig match {
+        case OverflowDbConfig(storageLocation, _, _) if !useCachedGraph =>
+          File(storageLocation).delete(swallowIOExceptions = true)
+        case TinkerGraphConfig(Some(importPath), _) if !useCachedGraph =>
+          File(importPath).delete(swallowIOExceptions = true)
+        case Neo4jEmbeddedConfig(_, databaseDir, _) if !useCachedGraph =>
+          File(databaseDir).delete(swallowIOExceptions = true)
+        case _ =>
+      }
+    }
+
+    val driver = if (useCachedGraph) {
+      config.dbConfig match {
+        case TinkerGraphConfig(Some(importPath), _) if File(importPath).exists =>
+          val driver = config.dbConfig.toDriver.asInstanceOf[TinkerGraphDriver]
+          driver.importGraph(importPath)
+          driver
+        case _ => config.dbConfig.toDriver
+      }
+    } else {
+      config.dbConfig.toDriver
+    }
 
-  @Setup
-  def setupFun(params: BenchmarkParams): Unit = {
-    params.getBenchmark
+    driver -> config
   }
 
-  @Benchmark
-  def astDFS(blackhole: Blackhole): Int
-
-  @Benchmark
-  def astUp(blackhole: Blackhole): Int
-
-  @Benchmark
-  def orderSumChecked(blackhole: Blackhole): Int
-
-  @Benchmark
-  def orderSumUnchecked(blackhole: Blackhole): Int
-
-  @Benchmark
-  def orderSumExplicit(blackhole: Blackhole): Int
-
-  @Benchmark
-  def callOrderTrav(blackhole: Blackhole): Int
-
-  @Benchmark
-  def callOrderExplicit(blackhole: Blackhole): Int
-
-  @Benchmark
-  def indexedMethodFullName(bh: Blackhole): Unit
-
-  @Benchmark
-  def unindexedMethodFullName(bh: Blackhole): Unit
-
 }
-
-//@State(Scope.Benchmark)
-//class OverflowDbBenchmark(config: OverflowDbConfig)
-//    extends GraphReadBenchmark(
-//    ) {
-//
-//  override def createAst(blackhole: Blackhole): Int = {
-//    0
-//  }
-//
-//  override def astDFS(blackhole: Blackhole): Int = ???
-//
-//  override def astUp(blackhole: Blackhole): Int = ???
-//
-//  override def orderSumChecked(blackhole: Blackhole): Int = ???
-//
-//  override def orderSumUnchecked(blackhole: Blackhole): Int = ???
-//
-//  override def orderSumExplicit(blackhole: Blackhole): Int = ???
-//
-//  override def callOrderTrav(blackhole: Blackhole): Int = ???
-//
-//  override def callOrderExplicit(blackhole: Blackhole): Int = ???
-//
-//  override def indexedMethodFullName(bh: Blackhole): Unit = ???
-//
-//  override def unindexedMethodFullName(bh: Blackhole): Unit = ???
-//}
diff --git a/src/main/scala/com/github/plume/oss/benchmarking/GraphReadBenchmark.scala b/src/main/scala/com/github/plume/oss/benchmarking/GraphReadBenchmark.scala
new file mode 100644
index 00000000..559c74cc
--- /dev/null
+++ b/src/main/scala/com/github/plume/oss/benchmarking/GraphReadBenchmark.scala
@@ -0,0 +1,88 @@
+package com.github.plume.oss.benchmarking
+
+import com.github.plume.oss
+import com.github.plume.oss.{Benchmark, JimpleAst2Database, PlumeConfig, TinkerGraphConfig}
+import com.github.plume.oss.drivers.{IDriver, TinkerGraphDriver}
+import io.joern.jimple2cpg.Config
+import org.openjdk.jmh.annotations.{Benchmark, Level, Param, Scope, Setup, State, TearDown, Timeout}
+import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole}
+
+import java.util.concurrent.TimeUnit
+import scala.compiletime.uninitialized
+
+@State(Scope.Benchmark)
+@Timeout(2, TimeUnit.MINUTES)
+trait GraphReadBenchmark {
+
+  @Param(Array(""))
+  protected var configStr: String        = ""
+  protected var config: PlumeConfig      = uninitialized
+  protected var nodeStart: Array[Long]   = new Array[Long](0)
+  protected var fullNames: Array[String] = uninitialized
+  protected var driver: IDriver          = uninitialized
+
+  protected def setupBenchmarkParams(params: BenchmarkParams): Unit = {
+    params.getBenchmark match {
+      case name if name.endsWith("astDFS") =>
+        nodeStart = setupAstDfs()
+      case name if name.endsWith("astUp") =>
+        nodeStart = setupAstUp()
+      case name if name.contains("orderSum") =>
+        nodeStart = setUpOrderSum()
+      case name if name.contains("callOrder") =>
+        nodeStart = setUpCallOrder()
+      case name if name.contains("MethodFullName") =>
+        fullNames = setUpMethodFullName()
+    }
+  }
+
+  protected def setupBenchmark(params: BenchmarkParams): Unit = {
+    val (driver_, config_) = oss.Benchmark.initializeDriverAndInputDir(configStr, useCachedGraph = true)
+    driver = driver_
+    config = config_
+    if (!driver.exists(1L)) {
+      JimpleAst2Database(driver).createAst(Config().withInputPath(config_.inputDir))
+      config.dbConfig match {
+        case TinkerGraphConfig(_, Some(exportPath)) => driver.asInstanceOf[TinkerGraphDriver].exportGraph(exportPath)
+        case _                                      =>
+      }
+    }
+  }
+
+  protected def setupAstDfs(): Array[Long]
+
+  protected def setupAstUp(): Array[Long]
+
+  protected def setUpOrderSum(): Array[Long]
+
+  protected def setUpCallOrder(): Array[Long]
+
+  protected def setUpMethodFullName(): Array[String]
+
+  @Benchmark
+  def astDFS(blackhole: Blackhole): Int
+
+  @Benchmark
+  def astUp(blackhole: Blackhole): Int
+
+  @Benchmark
+  def orderSum(blackhole: Blackhole): Int
+
+  @Benchmark
+  def callOrderTrav(blackhole: Blackhole): Int
+
+  @Benchmark
+  def callOrderExplicit(blackhole: Blackhole): Int
+
+  @Benchmark
+  def indexedMethodFullName(bh: Blackhole): Unit
+
+  @Benchmark
+  def unindexedMethodFullName(bh: Blackhole): Unit
+
+  @TearDown
+  def cleanupBenchmark(): Unit = {
+    driver.close()
+  }
+
+}
diff --git a/src/main/scala/com/github/plume/oss/benchmarking/GraphWriteBenchmark.scala b/src/main/scala/com/github/plume/oss/benchmarking/GraphWriteBenchmark.scala
new file mode 100644
index 00000000..a887faea
--- /dev/null
+++ b/src/main/scala/com/github/plume/oss/benchmarking/GraphWriteBenchmark.scala
@@ -0,0 +1,46 @@
+package com.github.plume.oss.benchmarking
+
+import com.github.plume.oss
+import com.github.plume.oss.{Benchmark, JimpleAst2Database}
+import com.github.plume.oss.drivers.IDriver
+import io.joern.jimple2cpg.Config
+import org.openjdk.jmh.annotations.{Benchmark, Level, Param, Scope, Setup, State, TearDown, Timeout}
+import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole}
+
+import java.util.concurrent.TimeUnit
+import scala.compiletime.uninitialized
+
+@State(Scope.Benchmark)
+@Timeout(5, TimeUnit.MINUTES)
+class GraphWriteBenchmark {
+
+  @Param(Array(""))
+  var configStr: String        = ""
+  private var driver: IDriver  = uninitialized
+  private var inputDir: String = uninitialized
+
+  @Setup
+  def setupBenchmark(params: BenchmarkParams): Unit = {
+    val (driver_, config) = oss.Benchmark.initializeDriverAndInputDir(configStr, useCachedGraph = false)
+    driver = driver_
+    inputDir = config.inputDir
+  }
+
+  @Setup(Level.Iteration)
+  def clearDriver(params: BenchmarkParams): Unit = {
+    driver.clear()
+  }
+
+  @Benchmark
+  def createAst(blackhole: Blackhole): Unit = {
+    JimpleAst2Database(driver).createAst(Config().withInputPath(inputDir))
+    Option(blackhole).foreach(_.consume(driver))
+  }
+
+  @TearDown
+  def cleanupBenchmark(): Unit = {
+    driver.clear()
+    driver.close()
+  }
+
+}
diff --git a/src/main/scala/com/github/plume/oss/benchmarking/Neo4jEmbedReadBenchmark.scala b/src/main/scala/com/github/plume/oss/benchmarking/Neo4jEmbedReadBenchmark.scala
new file mode 100644
index 00000000..985f9cc9
--- /dev/null
+++ b/src/main/scala/com/github/plume/oss/benchmarking/Neo4jEmbedReadBenchmark.scala
@@ -0,0 +1,254 @@
+package com.github.plume.oss.benchmarking
+
+import com.github.plume.oss.drivers.Neo4jEmbeddedDriver
+import io.shiftleft.codepropertygraph.generated.EdgeTypes.AST
+import io.shiftleft.codepropertygraph.generated.NodeTypes.{CALL, METHOD}
+import io.shiftleft.codepropertygraph.generated.PropertyNames.{FULL_NAME, ORDER}
+import org.neo4j.graphdb.GraphDatabaseService
+import org.openjdk.jmh.annotations.{Benchmark, Scope, Setup, State}
+import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole}
+import overflowdb.traversal.*
+
+import java.util
+import scala.compiletime.uninitialized
+import scala.jdk.CollectionConverters.*
+import scala.util.{Random, Using}
+
+@State(Scope.Benchmark)
+class Neo4jEmbedReadBenchmark extends GraphReadBenchmark {
+
+  private var g: GraphDatabaseService = uninitialized
+
+  @Setup
+  override def setupBenchmark(params: BenchmarkParams): Unit = {
+    super.setupBenchmark(params)
+    g = driver.asInstanceOf[Neo4jEmbeddedDriver].graph
+    setupBenchmarkParams(params)
+  }
+
+  override def setupAstDfs(): Array[Long] = {
+    Using.resource(g.beginTx) { tx =>
+      tx.execute(s"""
+           |MATCH (n)-[$AST]->()
+           |WHERE NOT (n)<-[$AST]-()
+           |RETURN n.id AS ID
+           |""".stripMargin)
+        .map { result => result.get("ID").asInstanceOf[Long] }
+        .toArray
+    }
+  }
+
+  override def setupAstUp(): Array[Long] = {
+    Using.resource(g.beginTx) { tx =>
+      tx.execute(s"""
+            |MATCH (n)-[$AST]->()
+            |RETURN n.id AS ID
+            |""".stripMargin)
+        .map { result => result.get("ID").asInstanceOf[Long] }
+        .toArray
+    }
+  }
+
+  override def setUpOrderSum(): Array[Long] = {
+    Using.resource(g.beginTx) { tx =>
+      tx.execute(s"""
+            |MATCH (n)
+            |WHERE n.$ORDER IS NOT NULL
+            |RETURN n.id AS ID
+            |""".stripMargin)
+        .map { result => result.get("ID").asInstanceOf[Long] }
+        .toArray
+    }
+  }
+
+  override def setUpCallOrder(): Array[Long] = {
+    Using.resource(g.beginTx) { tx =>
+      val res = tx
+        .execute(s"""
+            |MATCH (n: $CALL)
+            |WHERE n.$ORDER IS NOT NULL
+            |RETURN n.id AS ID
+            |""".stripMargin)
+        .map { result => result.get("ID").asInstanceOf[Long] }
+        .toList
+      res.toArray
+    }
+  }
+
+  override def setUpMethodFullName(): Array[String] = {
+    val fullNames_ = Using.resource(g.beginTx) { tx =>
+      tx.execute(s"""
+            |MATCH (n: $METHOD)
+            |WHERE n.$FULL_NAME IS NOT NULL
+            |RETURN n.$FULL_NAME as $FULL_NAME
+            |""".stripMargin)
+        .map { result => result.get(FULL_NAME).asInstanceOf[String] }
+        .toArray
+    }
+    fullNames = new Random(1234).shuffle(fullNames_).toArray
+    fullNames.slice(0, math.min(1000, fullNames.length))
+  }
+
+  @Benchmark
+  override def astDFS(blackhole: Blackhole): Int = {
+    val stack = scala.collection.mutable.ArrayDeque.empty[Long]
+    stack.addAll(nodeStart)
+    var nnodes = nodeStart.length
+    while (stack.nonEmpty) {
+      val childrenIds = Using.resource(g.beginTx) { tx =>
+        tx.execute(
+          s"""
+               |MATCH (n)-[AST]->(m)
+               |WHERE n.id = $$nodeId
+               |RETURN m.id AS ID
+               |""".stripMargin,
+          new util.HashMap[String, Object](1) {
+            put("nodeId", stack.removeLast().asInstanceOf[Object])
+          }
+        ).map { result => result.get("ID").asInstanceOf[Long] }
+          .toArray
+      }
+      stack.appendAll(childrenIds)
+      nnodes += 1
+    }
+    Option(blackhole).foreach(_.consume(nnodes))
+    nnodes
+  }
+
+  @Benchmark
+  override def astUp(blackhole: Blackhole): Int = {
+    var sumDepth = 0
+    for (node <- nodeStart) {
+      var nodeId = node
+      def getResult = Using.resource(g.beginTx) { tx =>
+        tx.execute(
+          s"""
+               |MATCH (n)<-[AST]-(m)
+               |WHERE n.id = $$nodeId
+               |RETURN m.id AS ID
+               |""".stripMargin,
+          new util.HashMap[String, Object](1) {
+            put("nodeId", nodeId.asInstanceOf[Object])
+          }
+        ).map { result => result.get("ID").asInstanceOf[Long] }
+          .toArray
+      }
+      var result  = getResult
+      def hasNext = result.nonEmpty
+      while (hasNext) {
+        sumDepth += 1
+        nodeId = result.head
+        result = getResult
+      }
+    }
+    Option(blackhole).foreach(_.consume(sumDepth))
+    sumDepth
+  }
+
+  @Benchmark
+  override def orderSum(blackhole: Blackhole): Int = {
+    var sumOrder = 0
+    for (nodeId <- nodeStart) {
+      val orderArr = Using.resource(g.beginTx) { tx =>
+        tx.execute(
+          s"""
+               |MATCH (n)
+               |WHERE n.id = $$nodeId
+               |RETURN n.$ORDER AS $ORDER
+               |""".stripMargin,
+          new util.HashMap[String, Object](1) {
+            put("nodeId", nodeId.asInstanceOf[Object])
+          }
+        ).map { result => result.get(ORDER).asInstanceOf[Int] }
+          .toArray
+      }
+      sumOrder += orderArr.head
+    }
+    Option(blackhole).foreach(_.consume(sumOrder))
+    sumOrder
+  }
+
+  @Benchmark
+  override def callOrderTrav(blackhole: Blackhole): Int = {
+    val res = Using.resource(g.beginTx) { tx =>
+      tx.execute(
+        s"""
+             |MATCH (n: $CALL)
+             |WHERE n.$ORDER > 2 AND n.id IN $$nodeIds
+             |RETURN COUNT(n) AS SIZE
+             |""".stripMargin,
+        new util.HashMap[String, Object](1) {
+          put("nodeIds", nodeStart.toList.asJava.asInstanceOf[Object])
+        }
+      ).map(_.get("SIZE").asInstanceOf[Int])
+        .next()
+    }
+    Option(blackhole).foreach(_.consume(res))
+    res
+  }
+
+  @Benchmark
+  override def callOrderExplicit(blackhole: Blackhole): Int = {
+    var res = 0
+    val nodes = Using.resource(g.beginTx) { tx =>
+      tx.execute(
+        s"""
+             |MATCH (n: $CALL)
+             |WHERE n.id IN $$nodeIds
+             |RETURN n.$ORDER as $ORDER
+             |""".stripMargin,
+        new util.HashMap[String, Object](1) {
+          put("nodeIds", nodeStart.toList.asJava.asInstanceOf[Object])
+        }
+      ).map(_.get(ORDER).asInstanceOf[Int])
+        .toArray
+    }
+    for (order <- nodes) {
+      if (order > 2) res += 1
+    }
+    Option(blackhole).foreach(_.consume(res))
+    res
+  }
+
+  @Benchmark
+  override def indexedMethodFullName(bh: Blackhole): Unit = {
+    fullNames.foreach { fullName =>
+      Using
+        .resource(g.beginTx) { tx =>
+          tx.execute(
+            s"""
+               |MATCH (n: $METHOD)
+               |WHERE n.$FULL_NAME = $$fullName
+               |RETURN n AS NODE
+               |""".stripMargin,
+            new util.HashMap[String, Object](1) {
+              put("fullName", fullName.asInstanceOf[Object])
+            }
+          ).map(_.get("NODE"))
+            .toArray
+        }
+        .foreach(bh.consume)
+    }
+  }
+
+  @Benchmark
+  override def unindexedMethodFullName(bh: Blackhole): Unit = {
+    fullNames.foreach { fullName =>
+      Using
+        .resource(g.beginTx) { tx =>
+          tx.execute(
+            s"""
+               |MATCH (n)
+               |WHERE n.$FULL_NAME = $$fullName and $METHOD IN labels(n)
+               |RETURN n AS NODE
+               |""".stripMargin,
+            new util.HashMap[String, Object](1) {
+              put("fullName", fullName.asInstanceOf[Object])
+            }
+          ).map(_.get("NODE"))
+            .toArray
+        }
+        .foreach(bh.consume)
+    }
+  }
+}
diff --git a/src/main/scala/com/github/plume/oss/benchmarking/OverflowDbReadBenchmark.scala b/src/main/scala/com/github/plume/oss/benchmarking/OverflowDbReadBenchmark.scala
new file mode 100644
index 00000000..e65c62ff
--- /dev/null
+++ b/src/main/scala/com/github/plume/oss/benchmarking/OverflowDbReadBenchmark.scala
@@ -0,0 +1,122 @@
+package com.github.plume.oss.benchmarking
+
+import com.github.plume.oss.benchmarking.GraphReadBenchmark
+import com.github.plume.oss.drivers.OverflowDbDriver
+import io.shiftleft.codepropertygraph.generated.Cpg
+import io.shiftleft.codepropertygraph.generated.EdgeTypes.AST
+import io.shiftleft.codepropertygraph.generated.PropertyNames.ORDER
+import io.shiftleft.codepropertygraph.generated.nodes.{Call, StoredNode}
+import org.openjdk.jmh.annotations.{Benchmark, Scope, Setup, State}
+import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole}
+import overflowdb.PropertyKey
+import overflowdb.traversal.*
+import scala.compiletime.uninitialized
+import scala.util.Random
+import io.shiftleft.semanticcpg.language.*
+
+@State(Scope.Benchmark)
+class OverflowDbReadBenchmark extends GraphReadBenchmark {
+
+  private var cpg: Cpg = uninitialized
+
+  @Setup
+  override def setupBenchmark(params: BenchmarkParams): Unit = {
+    super.setupBenchmark(params)
+    cpg = driver.asInstanceOf[OverflowDbDriver].cpg
+    setupBenchmarkParams(params)
+  }
+
+  override def setupAstDfs(): Array[Long] = {
+    cpg.graph.nodes.iterator.collect {
+      case node if node.in(AST).isEmpty && node.out(AST).nonEmpty => node.id()
+    }.toArray
+  }
+
+  override def setupAstUp(): Array[Long] = {
+    cpg.graph.nodes.iterator.map(_.id()).toArray
+  }
+
+  override def setUpOrderSum(): Array[Long] = {
+    cpg.graph.nodes.iterator.filter(n => n.propertiesMap().containsKey(ORDER)).map(_.id()).toArray
+  }
+
+  override def setUpCallOrder(): Array[Long] = {
+    cpg.graph.nodes.iterator.collect { case node: Call => node.id() }.toArray
+  }
+
+  override def setUpMethodFullName(): Array[String] = {
+    fullNames = new Random(1234).shuffle(cpg.method.fullName.iterator).toArray
+    fullNames.slice(0, math.min(1000, fullNames.length))
+  }
+
+  @Benchmark
+  override def astDFS(blackhole: Blackhole): Int = {
+    val stack = scala.collection.mutable.ArrayDeque.empty[Long]
+    stack.addAll(nodeStart)
+    var nnodes = nodeStart.length
+    while (stack.nonEmpty) {
+      val nx = cpg.graph.node(stack.removeLast()).asInstanceOf[StoredNode]
+      stack.appendAll(nx._astOut.map(_.id))
+      nnodes += 1
+    }
+    Option(blackhole).foreach(_.consume(nnodes))
+    nnodes
+  }
+
+  @Benchmark
+  override def astUp(blackhole: Blackhole): Int = {
+    var sumDepth = 0
+    for (node <- nodeStart) {
+      var p = cpg.graph.node(node)
+      while (p != null) {
+        sumDepth += 1
+        p = p.asInstanceOf[StoredNode]._astIn.nextOption.orNull
+      }
+    }
+    Option(blackhole).foreach(_.consume(sumDepth))
+    sumDepth
+  }
+
+  @Benchmark
+  override def orderSum(blackhole: Blackhole): Int = {
+    var sumOrder = 0
+    val propKey  = PropertyKey[Int](ORDER)
+    for (node <- nodeStart.map(cpg.graph.node)) {
+      sumOrder += node.asInstanceOf[StoredNode].property(propKey)
+    }
+    Option(blackhole).foreach(_.consume(sumOrder))
+    sumOrder
+  }
+
+  @Benchmark
+  override def callOrderTrav(blackhole: Blackhole): Int = {
+    val res = cpg.graph.nodes(nodeStart*).iterator.asInstanceOf[Iterator[Call]].orderGt(2).size
+    Option(blackhole).foreach(_.consume(res))
+    res
+  }
+
+  @Benchmark
+  override def callOrderExplicit(blackhole: Blackhole): Int = {
+    var res = 0
+    for (node <- cpg.graph.nodes(nodeStart*).iterator.asInstanceOf[Iterator[Call]]) {
+      if (node.order > 2) res += 1
+    }
+    Option(blackhole).foreach(_.consume(res))
+    res
+  }
+
+  @Benchmark
+  override def indexedMethodFullName(bh: Blackhole): Unit = {
+    fullNames.foreach { fullName =>
+      cpg.method.fullNameExact(fullName).foreach(bh.consume)
+    }
+  }
+
+  @Benchmark
+  override def unindexedMethodFullName(bh: Blackhole): Unit = {
+    for {
+      str   <- fullNames
+      found <- cpg.method.filter { _ => true }.fullNameExact(str)
+    } bh.consume(found)
+  }
+}
diff --git a/src/main/scala/com/github/plume/oss/benchmarking/TinkerGraphReadBenchmark.scala b/src/main/scala/com/github/plume/oss/benchmarking/TinkerGraphReadBenchmark.scala
new file mode 100644
index 00000000..999c2516
--- /dev/null
+++ b/src/main/scala/com/github/plume/oss/benchmarking/TinkerGraphReadBenchmark.scala
@@ -0,0 +1,121 @@
+package com.github.plume.oss.benchmarking
+
+import com.github.plume.oss.benchmarking.GraphReadBenchmark
+import com.github.plume.oss.drivers.TinkerGraphDriver
+import io.shiftleft.codepropertygraph.generated.EdgeTypes.AST
+import io.shiftleft.codepropertygraph.generated.NodeTypes.{CALL, METHOD}
+import io.shiftleft.codepropertygraph.generated.PropertyNames.{FULL_NAME, ORDER}
+import org.apache.tinkerpop.gremlin.process.traversal.P
+import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.{GraphTraversalSource, __}
+import org.openjdk.jmh.annotations.{Benchmark, Scope, Setup, State}
+import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole}
+
+import scala.jdk.CollectionConverters.*
+import scala.compiletime.uninitialized
+import scala.util.Random
+import overflowdb.traversal.*
+
+@State(Scope.Benchmark)
+class TinkerGraphReadBenchmark extends GraphReadBenchmark {
+
+  private var g: () => GraphTraversalSource = uninitialized
+
+  @Setup
+  override def setupBenchmark(params: BenchmarkParams): Unit = {
+    super.setupBenchmark(params)
+    g = () => driver.asInstanceOf[TinkerGraphDriver].g()
+    setupBenchmarkParams(params)
+  }
+
+  override def setupAstDfs(): Array[Long] = {
+    g().V().where(__.and(__.not(__.inE(AST)), __.outE(AST))).id().asScala.map(_.asInstanceOf[Long]).toArray
+  }
+
+  override def setupAstUp(): Array[Long] = {
+    g().V().id().asScala.map(_.asInstanceOf[Long]).toArray
+  }
+
+  override def setUpOrderSum(): Array[Long] = {
+    g().V().has(ORDER).id().asScala.map(_.asInstanceOf[Long]).toArray
+  }
+
+  override def setUpCallOrder(): Array[Long] = {
+    g().V().hasLabel(CALL).id().asScala.map(_.asInstanceOf[Long]).toArray
+  }
+
+  override def setUpMethodFullName(): Array[String] = {
+    fullNames = new Random(1234).shuffle(g().V().hasLabel(METHOD).properties(FULL_NAME).value()).toArray
+    fullNames.slice(0, math.min(1000, fullNames.length))
+  }
+
+  @Benchmark
+  override def astDFS(blackhole: Blackhole): Int = {
+    val stack = scala.collection.mutable.ArrayDeque.empty[Long]
+    stack.addAll(nodeStart)
+    var nnodes = nodeStart.length
+    while (stack.nonEmpty) {
+      val nx = g().V(stack.removeLast())
+      stack.appendAll(nx.out(AST).id().map(_.asInstanceOf[Long]).asScala.toArray)
+      nnodes += 1
+    }
+    Option(blackhole).foreach(_.consume(nnodes))
+    nnodes
+  }
+
+  @Benchmark
+  override def astUp(blackhole: Blackhole): Int = {
+    var sumDepth = 0
+    for (node <- nodeStart) {
+      var nodeId  = node
+      def hasNext = g().V(nodeId).in(AST).hasNext
+      while (hasNext) {
+        sumDepth += 1
+        nodeId = g().V(nodeId).in(AST).id().next().asInstanceOf[Long]
+      }
+    }
+    Option(blackhole).foreach(_.consume(sumDepth))
+    sumDepth
+  }
+
+  @Benchmark
+  override def orderSum(blackhole: Blackhole): Int = {
+    var sumOrder = 0
+    for (node <- nodeStart.map(g().V(_))) {
+      sumOrder += node.properties(ORDER).value().next().asInstanceOf[Int]
+    }
+    Option(blackhole).foreach(_.consume(sumOrder))
+    sumOrder
+  }
+
+  @Benchmark
+  override def callOrderTrav(blackhole: Blackhole): Int = {
+    val res = g().V(nodeStart*).hasLabel(CALL).has(ORDER, P.gt(2)).size
+    Option(blackhole).foreach(_.consume(res))
+    res
+  }
+
+  @Benchmark
+  override def callOrderExplicit(blackhole: Blackhole): Int = {
+    var res = 0
+    for (node <- g().V(nodeStart*).hasLabel(CALL)) {
+      if (node.property(ORDER).asInstanceOf[Int] > 2) res += 1
+    }
+    Option(blackhole).foreach(_.consume(res))
+    res
+  }
+
+  @Benchmark
+  override def indexedMethodFullName(bh: Blackhole): Unit = {
+    fullNames.foreach { fullName =>
+      g().V().hasLabel(METHOD).has(FULL_NAME, fullName).foreach(bh.consume)
+    }
+  }
+
+  @Benchmark
+  override def unindexedMethodFullName(bh: Blackhole): Unit = {
+    for {
+      str   <- fullNames
+      found <- g().V().hasLabel(METHOD).where(__.has(FULL_NAME, str))
+    } bh.consume(found)
+  }
+}