Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Graph Read Benchmarks #257

Merged
merged 9 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ graph.xml
gsql_client.*
*.txt
*.csv
/workspace
/results

# Ignore Gradle GUI config
gradle-app.setting
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ database backends. While the binary explains the available functions, the execut
Jmh/runMain com.github.plume.oss.Benchmark overflowdb testprogram -o output -r results --storage-location test.cpg
```

An automated script to run the benchmarks versus programs from the `defects4j` dataset is available under
`runBenchmarks.sc`, which can be executed with:
```bash
scala runBenchmarks.sc
```

## Logging

Plume uses [SLF4J](http://www.slf4j.org/) as the logging fascade.
Expand Down
40 changes: 38 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ libraryDependencies ++= Seq(
"org.openjdk.jmh" % "jmh-generator-reflection" % Versions.jmh,
"org.openjdk.jmh" % "jmh-generator-asm" % Versions.jmh,
"org.slf4j" % "slf4j-api" % Versions.slf4j,
"org.apache.logging.log4j" % "log4j-core" % Versions.log4j % Test,
"org.apache.logging.log4j" % "log4j-slf4j-impl" % Versions.log4j % Test,
"org.apache.logging.log4j" % "log4j-core" % Versions.log4j % Test,
"org.apache.logging.log4j" % "log4j-slf4j-impl" % Versions.log4j % Test,
"org.scalatest" %% "scalatest" % Versions.scalatest % Test
)

Expand All @@ -64,3 +64,39 @@ developers := List(
Global / onChangedBuildSource := ReloadOnSourceChanges

publishMavenStyle := true

// Benchmark Tasks

lazy val datasetDir = taskKey[File]("Dataset directory")
datasetDir := baseDirectory.value / "workspace" / "defects4j"
lazy val driversToBenchmark = taskKey[Seq[String]]("Drivers to benchmark")
driversToBenchmark := Seq("overflowdb", "tinkergraph", "neo4j-embedded")

lazy val defect4jDataset = taskKey[Seq[(String, String)]]("JARs for projects used in `defects4j`")
defect4jDataset :=
Seq(
"Chart" -> "https://repo1.maven.org/maven2/org/jfree/jfreechart/1.5.5/jfreechart-1.5.5.jar",
"Cli" -> "https://repo1.maven.org/maven2/commons-cli/commons-cli/1.8.0/commons-cli-1.8.0.jar",
"Closure" -> "https://repo1.maven.org/maven2/com/google/javascript/closure-compiler/v20240317/closure-compiler-v20240317.jar",
"Codec" -> "https://repo1.maven.org/maven2/commons-codec/commons-codec/1.17.0/commons-codec-1.17.0.jar",
"Collections" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-collections4/4.4/commons-collections4-4.4.jar",
"Compress" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-compress/1.26.2/commons-compress-1.26.2.jar",
"Csv" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-csv/1.11.0/commons-csv-1.11.0.jar",
"Gson" -> "https://repo1.maven.org/maven2/com/google/code/gson/gson/2.11.0/gson-2.11.0.jar",
"JacksonCore" -> "https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-core/2.17.2/jackson-core-2.17.2.jar",
"JacksonDatabind" -> "https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-databind/2.17.2/jackson-databind-2.17.2.jar",
"JacksonXml" -> "https://repo1.maven.org/maven2/com/fasterxml/jackson/dataformat/jackson-dataformat-xml/2.17.2/jackson-dataformat-xml-2.17.2.jar",
"Jsoup" -> "https://repo1.maven.org/maven2/org/jsoup/jsoup/1.18.1/jsoup-1.18.1.jar",
"JxPath" -> "https://repo1.maven.org/maven2/commons-jxpath/commons-jxpath/1.3/commons-jxpath-1.3.jar",
"Lang" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.14.0/commons-lang3-3.14.0.jar",
"Math" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1.jar",
"Mockito" -> "https://repo1.maven.org/maven2/org/mockito/mockito-core/5.12.0/mockito-core-5.12.0.jar",
"Time" -> "https://repo1.maven.org/maven2/joda-time/joda-time/2.12.7/joda-time-2.12.7.jar"
)

lazy val benchmarkDownloadTask = taskKey[Unit](s"Download `defects4j` candidates for benchmarking")
benchmarkDownloadTask := {
defect4jDataset.value.foreach { case (name, url) =>
DownloadHelper.ensureIsAvailable(url, datasetDir.value / s"$name.jar")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ abstract class GremlinDriver(txMax: Int = 50) extends IDriver {
* @return
* a Gremlin graph traversal source.
*/
protected def g(): GraphTraversalSource = {
def g(): GraphTraversalSource = {
traversalSource match {
case Some(conn) => conn
case None =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import com.github.plume.oss.util.BatchedUpdateUtil.*
import io.shiftleft.codepropertygraph.generated.nodes.StoredNode
import org.neo4j.configuration.GraphDatabaseSettings.DEFAULT_DATABASE_NAME
import org.neo4j.dbms.api.{DatabaseManagementService, DatabaseManagementServiceBuilder}
import org.neo4j.graphdb.{Label, Transaction}
import org.neo4j.graphdb.{GraphDatabaseService, Label, Transaction}
import org.slf4j.LoggerFactory
import overflowdb.BatchedUpdate.{CreateEdge, DiffOrBuilder, SetNodeProperty}
import overflowdb.{BatchedUpdate, DetachedNodeData}
Expand Down Expand Up @@ -41,6 +41,8 @@ final class Neo4jEmbeddedDriver(
})
}

def graph: GraphDatabaseService = graphDb

private def connect(): Unit = {
managementService = new DatabaseManagementServiceBuilder(databaseDir.path).build()
graphDb = managementService.database(databaseName)
Expand Down
48 changes: 48 additions & 0 deletions project/DownloadHelper.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import java.io.File
import java.net.URI
import java.nio.file.{Files, Path, Paths}

object DownloadHelper {
val LocalStorageDir = Paths.get(".local/source-urls")

/** Downloads the remote file from the given url if either
* - the localFile is not available,
* - or the url is different from the previously downloaded file
* - or we don't have the original url from the previously downloaded file
* We store the information about the previously downloaded urls and the localFile in `.local`
*/
def ensureIsAvailable(url: String, localFile: File): Unit = {
if (!localFile.exists() || Option(url) != previousUrlForLocalFile(localFile)) {
val localPath = localFile.toPath
Files.deleteIfExists(localPath)

println(s"[INFO] downloading $url to $localFile")
sbt.io.Using.urlInputStream(new URI(url).toURL) { inputStream =>
sbt.IO.transfer(inputStream, localFile)
}

// persist url in local storage
val storageFile = storageInfoFileFor(localFile)
Files.createDirectories(storageFile.getParent)
Files.writeString(storageFile, url)
}
}

private def relativePathToProjectRoot(path: Path): String =
Paths
.get("")
.toAbsolutePath
.normalize()
.relativize(path.toAbsolutePath)
.toString

private def previousUrlForLocalFile(localFile: File): Option[String] = {
Option(storageInfoFileFor(localFile))
.filter(Files.exists(_))
.map(Files.readString)
.filter(_.nonEmpty)
}

private def storageInfoFileFor(localFile: File): Path =
LocalStorageDir.resolve(relativePathToProjectRoot(localFile.toPath))
}
38 changes: 38 additions & 0 deletions runBenchmarks.sc
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import scala.sys.process.*
import java.nio.file.Files
import java.nio.file.Path
import scala.jdk.CollectionConverters.*

@main def main(): Unit = {
println("[info] Ensuring compilation status and benchmark dataset availability...")
"sbt compile benchmarkDownloadTask".!

val datasetDir = Path.of("workspace", "defects4j")
val resultsDir = Path.of("results")

if (!Files.exists(resultsDir)) Files.createDirectory(resultsDir)

def benchmarkArgs(driver: String, project: String): String = {
val projectDir = Path.of(datasetDir.toString, project)
val projectName = project.toLowerCase.stripSuffix(".jar")
val resultsPath = Path.of(resultsDir.toString, s"results-$driver-$projectName")
val outputPath = Path.of(resultsDir.toString, s"output-$driver-$projectName")
s"Jmh/runMain com.github.plume.oss.Benchmark $driver $projectDir -o ${outputPath.toAbsolutePath} -r ${resultsPath.toAbsolutePath}"
}

println("[info] Available projects:")
val projects = Files.list(datasetDir).filter(_.toString.endsWith(".jar")).toList.asScala.toList
projects.foreach(p => println(s" - ${p.getFileName.toString}"))

println("[info] Available drivers:")
val drivers = Seq("overflowdb")
drivers.foreach(d => println(s" - $d"))

drivers.foreach { driver =>
projects.foreach { project =>
val cmd = benchmarkArgs(driver, project.getFileName.toString)
println(s"[info] Benchmarking '$driver' on project '$project'")
s"sbt \"$cmd\"".!
}
}
}
178 changes: 67 additions & 111 deletions src/main/scala/com/github/plume/oss/Benchmark.scala
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
package com.github.plume.oss

import com.github.plume.oss.Benchmark.BenchmarkType.WRITE
import com.github.plume.oss.drivers.IDriver
import io.joern.jimple2cpg.Config
import better.files.File
import com.github.plume.oss.Benchmark.BenchmarkType.*
import com.github.plume.oss.benchmarking.{
GraphWriteBenchmark,
Neo4jEmbedReadBenchmark,
OverflowDbReadBenchmark,
TinkerGraphReadBenchmark
}
import com.github.plume.oss.drivers.{IDriver, TinkerGraphDriver}
import org.cache2k.benchmark.jmh.ForcedGcMemoryProfiler
import org.openjdk.jmh.annotations.{Benchmark, Level, Mode, Param, Scope, Setup, State, TearDown}
import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole}
import org.openjdk.jmh.annotations.Mode
import org.openjdk.jmh.runner.Runner
import org.openjdk.jmh.runner.options.{ChainedOptionsBuilder, OptionsBuilder, TimeValue}
import upickle.default.*

import java.util
import java.util.concurrent.TimeUnit
import scala.compiletime.uninitialized

object Benchmark {

Expand All @@ -28,13 +33,36 @@ object Benchmark {
s"Finished WRITE JMH benchmarks. Results: ${config.jmhResultFile}-WRITE.csv; Output: ${config.jmhOutputFile}-WRITE.csv"
)

// val readOptsBenchmark = createOptionsBoilerPlate(config, READ)
// .include(classOf[OverflowDbBenchmark].getSimpleName)
// .build()
// new Runner(readOptsBenchmark).run()
// println(
// s"Finished READ JMH benchmarks. Results: ${config.jmhResultFile}-READ.csv; Output: ${config.jmhOutputFile}-READ.csv"
// )
val readOptsBenchmark = config.dbConfig match {
case _: TinkerGraphConfig =>
Option(
createOptionsBoilerPlate(config, READ)
.include(classOf[TinkerGraphReadBenchmark].getSimpleName)
.build()
)
case _: OverflowDbConfig =>
Option(
createOptionsBoilerPlate(config, READ)
.include(classOf[OverflowDbReadBenchmark].getSimpleName)
.build()
)
case _: Neo4jEmbeddedConfig =>
Option(
createOptionsBoilerPlate(config, READ)
.include(classOf[Neo4jEmbedReadBenchmark].getSimpleName)
.build()
)
case x =>
println(s"Read benchmarks are not available for ${x.getClass.getSimpleName}, skipping...")
Option.empty
}
readOptsBenchmark.foreach { opts =>
new Runner(opts).run()
println(
s"Finished READ JMH benchmarks. Results: ${config.jmhResultFile}-READ.csv; Output: ${config.jmhOutputFile}-READ.csv"
)
}

}
}

Expand All @@ -58,105 +86,33 @@ object Benchmark {
case READ, WRITE
}

}

@State(Scope.Benchmark)
class GraphWriteBenchmark {

@Param(Array(""))
var configStr: String = ""
var config: PlumeConfig =
if (!configStr.isBlank) read[PlumeConfig](configStr) else PlumeConfig()
var driver: IDriver = uninitialized

@Setup
def setupBenchmark(params: BenchmarkParams): Unit = {
config = if (!configStr.isBlank) read[PlumeConfig](configStr) else PlumeConfig()
driver = config.dbConfig.toDriver
}

@Setup(Level.Iteration)
def clearDriver(params: BenchmarkParams): Unit = {
driver.clear()
}

@Benchmark
def createAst(blackhole: Blackhole): Unit = {
JimpleAst2Database(driver).createAst(Config().withInputPath(config.inputDir))
Option(blackhole).foreach(_.consume(driver))
}

@TearDown
def cleanupBenchmark(): Unit = {
driver.clear()
driver.close()
}

}

sealed trait GraphReadBenchmark[D <: IDriver](protected val driver: D) {

private var nodeStart: Array[Long] = new Array[Long](0)
private var fullNames: Array[String] = uninitialized
def initializeDriverAndInputDir(configStr: String, useCachedGraph: Boolean): (IDriver, PlumeConfig) = {
val config = if (!configStr.isBlank) read[PlumeConfig](configStr) else PlumeConfig()
if (!useCachedGraph) {
config.dbConfig match {
case OverflowDbConfig(storageLocation, _, _) if !useCachedGraph =>
File(storageLocation).delete(swallowIOExceptions = true)
case TinkerGraphConfig(Some(importPath), _) if !useCachedGraph =>
File(importPath).delete(swallowIOExceptions = true)
case Neo4jEmbeddedConfig(_, databaseDir, _) if !useCachedGraph =>
File(databaseDir).delete(swallowIOExceptions = true)
case _ =>
}
}

val driver = if (useCachedGraph) {
config.dbConfig match {
case TinkerGraphConfig(Some(importPath), _) if File(importPath).exists =>
val driver = config.dbConfig.toDriver.asInstanceOf[TinkerGraphDriver]
driver.importGraph(importPath)
driver
case _ => config.dbConfig.toDriver
}
} else {
config.dbConfig.toDriver
}

@Setup
def setupFun(params: BenchmarkParams): Unit = {
params.getBenchmark
driver -> config
}

@Benchmark
def astDFS(blackhole: Blackhole): Int

@Benchmark
def astUp(blackhole: Blackhole): Int

@Benchmark
def orderSumChecked(blackhole: Blackhole): Int

@Benchmark
def orderSumUnchecked(blackhole: Blackhole): Int

@Benchmark
def orderSumExplicit(blackhole: Blackhole): Int

@Benchmark
def callOrderTrav(blackhole: Blackhole): Int

@Benchmark
def callOrderExplicit(blackhole: Blackhole): Int

@Benchmark
def indexedMethodFullName(bh: Blackhole): Unit

@Benchmark
def unindexedMethodFullName(bh: Blackhole): Unit

}

//@State(Scope.Benchmark)
//class OverflowDbBenchmark(config: OverflowDbConfig)
// extends GraphReadBenchmark(
// ) {
//
// override def createAst(blackhole: Blackhole): Int = {
// 0
// }
//
// override def astDFS(blackhole: Blackhole): Int = ???
//
// override def astUp(blackhole: Blackhole): Int = ???
//
// override def orderSumChecked(blackhole: Blackhole): Int = ???
//
// override def orderSumUnchecked(blackhole: Blackhole): Int = ???
//
// override def orderSumExplicit(blackhole: Blackhole): Int = ???
//
// override def callOrderTrav(blackhole: Blackhole): Int = ???
//
// override def callOrderExplicit(blackhole: Blackhole): Int = ???
//
// override def indexedMethodFullName(bh: Blackhole): Unit = ???
//
// override def unindexedMethodFullName(bh: Blackhole): Unit = ???
//}
Loading
Loading