diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..320b5bc --- /dev/null +++ b/.gitignore @@ -0,0 +1,45 @@ +*.class +*.log + +# sbt specific +dist/* +target/ +lib_managed/ +src_managed/ +project/boot/ +project/plugins/project/ + +# Scala-IDE specific +.scala_dependencies +.cache + +*.owl + +.classpath + +.project + +.classpath + +.project + +.DS_Store + +*.nt + +.settings/org.scala-ide.sdt.core.prefs + +*.binary + +*.txt + +*.zip +binary-split/0.filtered-split + +*.snapshot + +*.filtered-split + +*.config + +*.split diff --git a/build.sbt b/build.sbt new file mode 100755 index 0000000..7ee602f --- /dev/null +++ b/build.sbt @@ -0,0 +1,31 @@ +import AssemblyKeys._ +assemblySettings + +/** Project */ +name := "triplerush-evaluation" + +version := "1.0-SNAPSHOT" + +organization := "com.signalcollect" + +scalaVersion := "2.10.3" + +scalacOptions ++= Seq("-optimize", "-Yinline-warnings", "-feature", "-deprecation") + +EclipseKeys.createSrc := EclipseCreateSrc.Default + EclipseCreateSrc.Resource + +EclipseKeys.withSource := true + +test in assembly := {} + +parallelExecution in Test := false + +excludedJars in assembly <<= (fullClasspath in assembly) map { cp => + cp filter {_.data.getName == "minlog-1.2.jar"} +} + +/** Dependencies */ +libraryDependencies ++= Seq( + "org.scala-lang" % "scala-library" % "2.10.3" % "compile", + "com.google.collections" % "google-collections" % "1.0" + ) diff --git a/lib/core-1.0.jar b/lib/core-1.0.jar new file mode 100644 index 0000000..156df91 Binary files /dev/null and b/lib/core-1.0.jar differ diff --git a/lib/spreadsheet-3.0.jar b/lib/spreadsheet-3.0.jar new file mode 100644 index 0000000..2e847d7 Binary files /dev/null and b/lib/spreadsheet-3.0.jar differ diff --git a/project/Build.scala b/project/Build.scala new file mode 100644 index 0000000..250e4a0 --- /dev/null +++ b/project/Build.scala @@ -0,0 +1,8 @@ +import sbt._ +import Keys._ + +object GraphsBuild extends Build { + lazy val triplerush = ProjectRef(file("../triplerush"), id = "triplerush") + val scTriplerush = Project(id = "triplerush-evaluation", + base = file(".")) dependsOn (triplerush) +} diff --git a/project/plugins.sbt b/project/plugins.sbt new file mode 100644 index 0000000..16e3f66 --- /dev/null +++ b/project/plugins.sbt @@ -0,0 +1,5 @@ +resolvers += Resolver.url("artifactory", url("http://scalasbt.artifactoryonline.com/scalasbt/sbt-plugin-releases"))(Resolver.ivyStylePatterns) + +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.9.0") + +addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.4.0") diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/DbpsbBenchmark.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/DbpsbBenchmark.scala new file mode 100755 index 0000000..41e655c --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/DbpsbBenchmark.scala @@ -0,0 +1,388 @@ +///* +// * @author Philip Stutz +// * @author Mihaela Verman +// * +// * Copyright 2013 University of Zurich +// * +// * Licensed under the Apache License, Version 2.0 (the "License"); +// * you may not use this file except in compliance with the License. +// * You may obtain a copy of the License at +// * +// * http://www.apache.org/licenses/LICENSE-2.0 +// * +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// * +// */ +// +//package com.signalcollect.triplerush.evaluation +// +//import java.io.File +//import java.util.Date +//import java.util.concurrent.TimeUnit +//import scala.concurrent.Await +//import scala.concurrent.duration.FiniteDuration +//import scala.concurrent.duration._ +//import scala.io.Source +//import scala.util.Random +//import com.signalcollect.GraphBuilder +//import com.signalcollect.factory.messagebus.BulkAkkaMessageBusFactory +//import com.signalcollect.nodeprovisioning.torque.LocalHost +//import com.signalcollect.nodeprovisioning.torque.TorqueHost +//import com.signalcollect.nodeprovisioning.torque.TorqueJobSubmitter +//import com.signalcollect.nodeprovisioning.torque.TorqueNodeProvisioner +//import com.signalcollect.nodeprovisioning.torque.TorquePriority +//import com.signalcollect.triplerush.Mapping +//import com.signalcollect.triplerush.TripleRush +//import com.signalcollect.triplerush.vertices.QueryOptimizer +//import com.signalcollect.triplerush.TriplePattern +//import com.signalcollect.triplerush.Mapping +//import akka.event.Logging +//import com.signalcollect.triplerush.QuerySpecification +//import scala.collection.mutable.UnrolledBuffer +//import java.lang.management.ManagementFactory +//import collection.JavaConversions._ +//import language.postfixOps +// +///** +// * Runs a PageRank algorithm on a graph of a fixed size +// * for different numbers of worker threads. +// * +// * Evaluation is set to execute on a 'Kraken'-node. +// */ +//object DbpsbBenchmark extends App { +// def jvmParameters = " -Xmx31000m" + +// " -Xms31000m" + +// " -XX:+AggressiveOpts" + +// " -XX:+AlwaysPreTouch" + +// " -XX:+UseNUMA" + +// " -XX:-UseBiasedLocking" + +// " -XX:MaxInlineSize=1024" +// +// def assemblyPath = "./target/scala-2.10/triplerush-assembly-1.0-SNAPSHOT.jar" +// val assemblyFile = new File(assemblyPath) +// val kraken = new TorqueHost( +// jobSubmitter = new TorqueJobSubmitter(username = System.getProperty("user.name"), hostname = "kraken.ifi.uzh.ch"), +// localJarPath = assemblyPath, jvmParameters = jvmParameters, jdkBinPath = "/home/user/stutz/jdk1.7.0/bin/", priority = TorquePriority.fast) +// val localHost = new LocalHost +// val googleDocs = new GoogleDocsResultHandler(args(0), args(1), "triplerush", "data") +// +// def getRevision: String = { +// try { +// val gitLogPath = ".git/logs/HEAD" +// val gitLog = new File(gitLogPath) +// val lines = Source.fromFile(gitLogPath).getLines +// val lastLine = lines.toList.last +// val revision = lastLine.split(" ")(1) +// revision +// } catch { +// case t: Throwable => "Unknown revision." +// } +// } +// +// /*********/ +// def evalName = s"DBPSB Evaluation." +// def runs = 10 +// var evaluation = new Evaluation(executionHost = kraken).addResultHandler(googleDocs) +// /*********/ +// +// for (run <- 1 to runs) { +// for (optimizer <- List(QueryOptimizer.Clever)) { +// evaluation = evaluation.addEvaluationRun(dbpsbBenchmarkRun( +// evalName, +// false, +// Long.MaxValue, +// optimizer, +// getRevision)) +// } +// } +// evaluation.execute +// +// def dbpsbBenchmarkRun( +// description: String, +// sampling: Boolean, +// tickets: Long, +// optimizer: Int, +// revision: String)(): List[Map[String, String]] = { +// +// /** +// * Queries from: Trinity.RDF +// * +// * Times Trinity: 7 220 5 7 8 21 13 28 +// */ +// val game = -1 +// val title = -2 +// +// val var3 = -1 +// val var2 = -2 +// val var1 = -3 +// +// val musician = -1 +// val name = -2 +// val vdescription = -3 +// +// val person = -1 +// val birth = -2 +// val pname = -3 +// val death = -4 +// +// val car = -1 +// val man = -3 +// val manufacturer = -4 +// +// val bvar6 = -1 +// val bvar = -2 +// val bvar0 = -3 +// val bvar1 = -4 +// val bvar2 = -5 +// val bvar3 = -6 +// +// val s = -1 +// val player = -2 +// val position = -3 +// val club = -4 +// val cap = -5 +// val place = -6 +// val pop = -7 +// val tricot = -8 +// +// val m = Map( +// ("http://www.w3.org/2004/02/skos/core#subject", 1), +// ("http://dbpedia.org/resource/Category:First-person_shooters", 47406), +// ("foaf:name", 41), +// ("foaf:homepage", 653), +// ("rdf#type", 16), +// ("http://dbpedia.org/resource/Category:German_musicians", 187543), +// ("rdfs#comment", 27), +// ("dbo:birthPlace", 1132), +// ("http://dbpedia.org/resource/Berlin", 19706), +// ("dbo:birthDate", 436), +// ("dbo:deathDate", 1177), +// ("http://dbpedia.org/resource/Category:Luxury_vehicles", 322352), +// ("dbo:manufacturer", 11736), +// ("dbprop:name", 30), +// ("dbprop:pages", 37409), +// ("dbprop:isbn", 3385), +// ("dbprop:author", 3371), +// ("foaf:page", 39), +// ("dbo:SoccerPlayer", 1723), +// ("dbprop:position", 397), +// ("dbprop:clubs", 1709), +// ("dbo:capacity", 6306), +// ("dbprop:population", 966), +// ("dbo:number", 411)) +// +// /** +// * Queries from Trinity.RDF paper +// * +// */ +// def fullQueries: List[QuerySpecification] = List( +// QuerySpecification(List( +// TriplePattern(game, m("http://www.w3.org/2004/02/skos/core#subject"), m("http://dbpedia.org/resource/Category:First-person_shooters")), //?game . +// TriplePattern(game, m("foaf:name"), title)) //?game foaf:name ?title . +// ), +// QuerySpecification(List( +// TriplePattern(var3, m("foaf:homepage"), var2), //?var3 ?var2 . +// TriplePattern(var3, m("rdf#type"), var1)) //?var3 ?var +// ), +// QuerySpecification(List( +// TriplePattern(musician, m("http://www.w3.org/2004/02/skos/core#subject"), m("http://dbpedia.org/resource/Category:German_musicians")), //?musician . +// TriplePattern(musician, m("foaf:name"), name), //?musician foaf:name ?name . +// TriplePattern(musician, m("rdfs#comment"), vdescription)) //?musician rdfs:comment ?description +// ), +// QuerySpecification(List( +// TriplePattern(person, m("dbo:birthPlace"), m("http://dbpedia.org/resource/Berlin")), +// TriplePattern(person, m("dbo:birthDate"), birth), +// TriplePattern(person, m("foaf:name"), pname), +// TriplePattern(person, m("dbo:deathDate"), death))), +// QuerySpecification(List( +// TriplePattern(car, m("http://www.w3.org/2004/02/skos/core#subject"), m("http://dbpedia.org/resource/Category:Luxury_vehicles")), +// TriplePattern(car, m("foaf:name"), name), +// TriplePattern(car, m("dbo:manufacturer"), man), +// TriplePattern(man, m("foaf:name"), manufacturer))), +// QuerySpecification(List( +// TriplePattern(bvar6, m("rdf#type"), bvar), +// TriplePattern(bvar6, m("dbprop:name"), bvar0), +// TriplePattern(bvar6, m("dbprop:pages"), bvar1), +// TriplePattern(bvar6, m("dbprop:isbn"), bvar2), +// TriplePattern(bvar6, m("dbprop:author"), bvar3))), +// QuerySpecification(List( +// TriplePattern(bvar6, m("rdf#type"), bvar), +// TriplePattern(bvar6, m("dbprop:name"), bvar0), +// TriplePattern(bvar6, m("dbprop:pages"), bvar1), +// TriplePattern(bvar6, m("dbprop:isbn"), bvar2), +// TriplePattern(bvar6, m("dbprop:author"), bvar3))), +// QuerySpecification(List( +// TriplePattern(s, m("foaf:page"), player), +// TriplePattern(s, m("rdf#type"), m("dbo:SoccerPlayer")), +// TriplePattern(s, m("dbprop:position"), position), +// TriplePattern(s, m("dbprop:clubs"), club), +// TriplePattern(club, m("dbo:capacity"), cap), +// TriplePattern(s, m("dbo:birthPlace"), place), +// TriplePattern(place, m("dbprop:population"), pop), +// TriplePattern(s, m("dbo:number"), tricot)))) +// val queries = { +// require(!sampling && tickets == Long.MaxValue) +// fullQueries +// } +// +// var baseResults = Map[String, String]() +// val qe = new TripleRush() +// +// def loadDbpsb { +// val dbpsbFolderName = s"dbpsb10-filtered-splits" +// for (splitId <- 0 until 2880) { +// qe.loadBinary(s"./$dbpsbFolderName/$splitId.filtered-split", Some(splitId)) +// if (splitId % 288 == 279) { +// println(s"Dispatched up to split #$splitId/2880, awaiting idle.") +// qe.awaitIdle +// println(s"Continuing graph loading...") +// } +// } +// } +// +// /** +// * Returns the time in milliseconds it takes to execute the code in 'codeBlock'. +// */ +// def measureTime(codeBlock: => Unit): Long = { +// val startTime = System.currentTimeMillis +// codeBlock +// val finishTime = System.currentTimeMillis +// finishTime - startTime +// } +// +// def roundToMillisecondFraction(nanoseconds: Long): Double = { +// ((nanoseconds / 100000.0).round) / 10.0 +// } +// +// def jitRepetitions = 100 +// +// /** +// * Go to JVM JIT steady state by executing the queries multiple times. +// */ +// def jitSteadyState { +// for (i <- 1 to jitRepetitions) { +// for (queryId <- 1 to 8) { +// val queryIndex = queryId - 1 +// val query = fullQueries(queryIndex) +// print(s"Warming up with query $query ...") +// qe.executeQuery(query.toParticle) +// qe.awaitIdle +// println(s" Done.") +// } +// } +// } +// +// lazy val gcs = ManagementFactory.getGarbageCollectorMXBeans +// +// def getGcCollectionTime: Long = { +// gcs map (_.getCollectionTime) sum +// } +// +// def getGcCollectionCount: Long = { +// gcs map (_.getCollectionCount) sum +// } +// +// lazy val compilations = ManagementFactory.getCompilationMXBean +// +// lazy val javaVersion = ManagementFactory.getRuntimeMXBean.getVmVersion +// +// lazy val jvmLibraryPath = ManagementFactory.getRuntimeMXBean.getLibraryPath +// +// lazy val jvmArguments = ManagementFactory.getRuntimeMXBean.getInputArguments +// +// def cleanGarbage { +// for (i <- 1 to 10) { +// System.runFinalization +// System.gc +// Thread.sleep(10000) +// } +// Thread.sleep(120000) +// } +// +// var finalResults = List[Map[String, String]]() +// def runEvaluation(queryId: Int) { +// var runResult = baseResults +// var date: Date = new Date +// val queryIndex = queryId - 1 +// val query = queries(queryIndex) +// val particle = query.toParticle +// val gcTimeBefore = getGcCollectionTime +// val gcCountBefore = getGcCollectionCount +// val compileTimeBefore = compilations.getTotalCompilationTime +// runResult += ((s"totalMemoryBefore", bytesToGigabytes(Runtime.getRuntime.totalMemory).toString)) +// runResult += ((s"freeMemoryBefore", bytesToGigabytes(Runtime.getRuntime.freeMemory).toString)) +// runResult += ((s"usedMemoryBefore", bytesToGigabytes(Runtime.getRuntime.totalMemory - Runtime.getRuntime.freeMemory).toString)) +// val startTime = System.nanoTime +// val (queryResultFuture, queryStatsFuture) = qe.executeAdvancedQuery(particle) +// val queryResult = Await.result(queryResultFuture, 7200 seconds) +// val finishTime = System.nanoTime +// val executionTime = roundToMillisecondFraction(finishTime - startTime) +// val gcTimeAfter = getGcCollectionTime +// val gcCountAfter = getGcCollectionCount +// val gcTimeDuringQuery = gcTimeAfter - gcTimeBefore +// val gcCountDuringQuery = gcCountAfter - gcCountBefore +// val compileTimeAfter = compilations.getTotalCompilationTime +// val compileTimeDuringQuery = compileTimeAfter - compileTimeBefore +// val queryStats = Await.result(queryStatsFuture, 10 seconds) +// val optimizingTime = roundToMillisecondFraction(queryStats("optimizingDuration").asInstanceOf[Long]) +// runResult += ((s"revision", revision)) +// runResult += ((s"queryId", queryId.toString)) +// runResult += ((s"optimizer", optimizer.toString)) +// runResult += ((s"queryCopyCount", queryStats("queryCopyCount").toString)) +// runResult += ((s"query", queryStats("optimizedQuery").toString)) +// runResult += ((s"exception", queryStats("exception").toString)) +// runResult += ((s"results", queryResult.size.toString)) +// runResult += ((s"executionTime", executionTime.toString)) +// runResult += ((s"optimizingTime", optimizingTime.toString)) +// runResult += ((s"totalMemory", bytesToGigabytes(Runtime.getRuntime.totalMemory).toString)) +// runResult += ((s"freeMemory", bytesToGigabytes(Runtime.getRuntime.freeMemory).toString)) +// runResult += ((s"usedMemory", bytesToGigabytes(Runtime.getRuntime.totalMemory - Runtime.getRuntime.freeMemory).toString)) +// runResult += ((s"executionHostname", java.net.InetAddress.getLocalHost.getHostName)) +// runResult += (("gcTimeAfter", gcTimeAfter.toString)) +// runResult += (("gcCountAfter", gcCountAfter.toString)) +// runResult += (("gcTimeDuringQuery", gcTimeDuringQuery.toString)) +// runResult += (("gcCountDuringQuery", gcCountDuringQuery.toString)) +// runResult += (("compileTimeAfter", compileTimeAfter.toString)) +// runResult += (("compileTimeDuringQuery", compileTimeDuringQuery.toString)) +// runResult += ((s"loadNumber", 10.toString)) +// runResult += ((s"date", date.toString)) +// runResult += ((s"dataSet", s"dbpsb10")) +// finalResults = runResult :: finalResults +// } +// +// def bytesToGigabytes(bytes: Long): Double = ((bytes / 1073741824.0) * 10.0).round / 10.0 +// +// baseResults += (("evaluationDescription", description)) +// baseResults += (("jitRepetitions", jitRepetitions.toString)) +// baseResults += (("java.runtime.version", System.getProperty("java.runtime.version"))) +// baseResults += (("javaVmVersion", javaVersion)) +// baseResults += (("jvmLibraryPath", jvmLibraryPath)) +// baseResults += (("jvmArguments", jvmArguments.mkString(" "))) +// +// val loadingTime = measureTime { +// println("Dispatching loading command to worker...") +// loadDbpsb +// qe.awaitIdle +// } +// baseResults += (("loadingTime", loadingTime.toString)) +// +// println("Starting warm-up...") +// jitSteadyState +// cleanGarbage +// println(s"Finished warm-up.") +// for (queryId <- 1 to 8) { +// println(s"Running evaluation for query $queryId.") +// runEvaluation(queryId) +// println(s"Done running evaluation for query $queryId. Awaiting idle") +// qe.awaitIdle +// println("Idle") +// } +// qe.shutdown +// finalResults +// } +// +//} diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/Evaluation.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/Evaluation.scala new file mode 100644 index 0000000..4a61906 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/Evaluation.scala @@ -0,0 +1,49 @@ +/* + * @author Philip Stutz + * @author Mihaela Verman + * + * Copyright 2013 University of Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.signalcollect.triplerush.evaluation + +import com.signalcollect.nodeprovisioning.torque._ +import com.signalcollect.util.RandomString + +case class Evaluation( + executionHost: ExecutionHost = new LocalHost, + evaluationRuns: List[() => List[Map[String, String]]] = List(), + resultHandlers: List[Map[String, String] => Unit] = List(println(_)), + extraStats: Map[String, String] = Map()) { + def addEvaluationRun(evaluationRun: () => List[Map[String, String]]) = Evaluation(executionHost, evaluationRun :: evaluationRuns, resultHandlers, extraStats) + def addResultHandler(resultHandler: Map[String, String] => Unit) = Evaluation(executionHost, evaluationRuns, resultHandler :: resultHandlers, extraStats) + def addExtraStats(stats: Map[String, String]) = Evaluation(executionHost, evaluationRuns, resultHandlers, extraStats ++ stats) + def execute { + val jobs = evaluationRuns map { evaluationRun => + val jobId = s"node-0-${RandomString.generate(6)}" + val jobFunction = () => { + println(s"Job $jobId is being executed ...") + val stats = evaluationRun() // Execute evaluation. + for (stat <- stats) { + resultHandlers foreach (handler => handler(stat ++ extraStats ++ Map("jobId" -> jobId.toString))) + } + println("Done.") + } + Job(jobFunction, jobId) + } + executionHost.executeJobs(jobs) + } +} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/GoogleDocsResultHandler.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/GoogleDocsResultHandler.scala new file mode 100644 index 0000000..c8f9b81 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/GoogleDocsResultHandler.scala @@ -0,0 +1,126 @@ +/* + * @author Daniel Strebel + * @author Philip Stutz + * + * Copyright 2012 University of Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.signalcollect.triplerush.evaluation + +import java.net.URL +import scala.collection.JavaConversions._ +import com.google.gdata.client.spreadsheet._ +import com.google.gdata.data._ +import com.google.gdata.data.spreadsheet._ +import com.signalcollect.nodeprovisioning.torque._ +import com.google.gdata.util.InvalidEntryException + +class GoogleDocsResultHandler(username: String, password: String, spreadsheetName: String, worksheetName: String) + extends Function1[Map[String, String], Unit] + with Serializable { + + def apply(data: Map[String, String]) = { + val service: SpreadsheetService = actionWithExponentialRetry[SpreadsheetService](() => new SpreadsheetService("uzh-signalcollect-2.0.0")) + actionWithExponentialRetry(() => service.setUserCredentials(username, password)) + val spreadsheet = actionWithExponentialRetry(() => getSpreadsheet(spreadsheetName, service)) + val worksheet = actionWithExponentialRetry(() => getWorksheetInSpreadsheet(worksheetName, spreadsheet)) + actionWithExponentialRetry(() => insertRow(worksheet, data, service)) + } + + def getWorksheetInSpreadsheet(title: String, spreadsheet: SpreadsheetEntry): WorksheetEntry = { + var result: WorksheetEntry = null.asInstanceOf[WorksheetEntry] + val worksheets = actionWithExponentialRetry(() => spreadsheet.getWorksheets) + for (worksheet <- worksheets) { + val currentWorksheetTitle = actionWithExponentialRetry(() => worksheet.getTitle.getPlainText) + if (currentWorksheetTitle == title) { + result = worksheet + } + } + if (result == null) { + throw new Exception("Worksheet with title \"" + title + "\" not found within spreadsheet " + spreadsheet.getTitle.getPlainText + ".") + } + result + } + + def getSpreadsheet(title: String, service: SpreadsheetService): SpreadsheetEntry = { + var result: SpreadsheetEntry = null.asInstanceOf[SpreadsheetEntry] + val spreadsheetFeedUrl = new URL("https://spreadsheets.google.com/feeds/spreadsheets/private/full") + val spreadsheetFeed = actionWithExponentialRetry(() => service.getFeed(spreadsheetFeedUrl, classOf[SpreadsheetFeed])) + val spreadsheets = actionWithExponentialRetry(() => spreadsheetFeed.getEntries) + for (spreadsheet <- spreadsheets) { + val currentSpreadsheetTitle = actionWithExponentialRetry(() => spreadsheet.getTitle.getPlainText) + if (currentSpreadsheetTitle == title) { + result = spreadsheet + } + } + if (result == null) { + throw new Exception("Spreadsheet with title \"" + title + "\" not found.") + } + result + } + + def insertRow(worksheet: WorksheetEntry, dataMap: Map[String, String], service: SpreadsheetService) { + val newEntry = new ListEntry + val elem = actionWithExponentialRetry(() => newEntry.getCustomElements) + for (dataTuple <- dataMap) { + actionWithExponentialRetry(() => elem.setValueLocal(dataTuple._1, dataTuple._2)) + } + actionWithExponentialRetry(() => service.insert(worksheet.getListFeedUrl, newEntry)) + } + + def actionWithExponentialRetry[G](action: () => G): G = { + try { + action() + } catch { + case i: InvalidEntryException => null.asInstanceOf[G] // ignore, they make no sense and the entry is still successfully written + case e: Exception => + // just retry a few times + try { + println("Spreadsheet API exception: " + e) + println("Spreadsheet API retry in 1 second") + Thread.sleep(1000) + println("Retrying.") + action() + } catch { + case i: InvalidEntryException => null.asInstanceOf[G] // ignore, they make no sense and the entry is still successfully written + case e: Exception => + try { + println("Spreadsheet API exception: " + e) + println("Spreadsheet API retry in 10 seconds") + Thread.sleep(10000) + println("Retrying.") + action() + } catch { + case i: InvalidEntryException => null.asInstanceOf[G] // ignore, they make no sense and the entry is still successfully written + case e: Exception => + try { + println("Spreadsheet API exception: " + e) + println("Spreadsheet API retry in 100 seconds") + Thread.sleep(100000) + println("Retrying.") + action() + } catch { + case i: InvalidEntryException => null.asInstanceOf[G] // ignore, they make no sense and the entry is still successfully written + case e: Exception => + println("Google API did not acknowledge write: " + e) + null.asInstanceOf[G] + } + } + } + } + } + +} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/LocalDbpsbProfiling.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/LocalDbpsbProfiling.scala new file mode 100644 index 0000000..7d83cdb --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/LocalDbpsbProfiling.scala @@ -0,0 +1,365 @@ +///* +// * @author Philip Stutz +// * @author Mihaela Verman +// * +// * Copyright 2013 University of Zurich +// * +// * Licensed under the Apache License, Version 2.0 (the "License"); +// * you may not use this file except in compliance with the License. +// * You may obtain a copy of the License at +// * +// * http://www.apache.org/licenses/LICENSE-2.0 +// * +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// * +// */ +// +//package com.signalcollect.triplerush.evaluation +// +//import java.io.File +//import java.util.Date +//import java.util.concurrent.TimeUnit +//import scala.concurrent.Await +//import scala.concurrent.duration.FiniteDuration +//import scala.io.Source +//import scala.util.Random +//import com.signalcollect.GraphBuilder +//import com.signalcollect.factory.messagebus.BulkAkkaMessageBusFactory +//import com.signalcollect.nodeprovisioning.torque.LocalHost +//import com.signalcollect.nodeprovisioning.torque.TorqueHost +//import com.signalcollect.nodeprovisioning.torque.TorqueJobSubmitter +//import com.signalcollect.nodeprovisioning.torque.TorqueNodeProvisioner +//import com.signalcollect.nodeprovisioning.torque.TorquePriority +//import com.signalcollect.triplerush.Mapping +//import com.signalcollect.triplerush.QueryEngine +//import com.signalcollect.triplerush.vertices.QueryOptimizer +//import com.signalcollect.triplerush.TriplePattern +//import com.signalcollect.triplerush.Mapping +//import akka.event.Logging +//import com.signalcollect.triplerush.vertices.QueryResult +//import com.signalcollect.triplerush.QuerySpecification +//import scala.collection.mutable.UnrolledBuffer +// +///** +// * Local profiling of DBPSB benchmark on part of data. +// */ +//object LocalDbpsbProfiling extends App { +// def jvmHighThroughputGc = " -Xmx3000m" + +// " -Xms3000m" + +// " -Xmn400m" + +// " -d64" + +// " -XX:+UnlockExperimentalVMOptions" + +// " -XX:+UseConcMarkSweepGC" + +// " -XX:+UseParNewGC" + +// " -XX:+CMSIncrementalPacing" + +// " -XX:+CMSIncrementalMode" + +// " -XX:ParallelGCThreads=20" + +// " -XX:ParallelCMSThreads=20" + +// " -XX:-PrintCompilation" + +// " -XX:-PrintGC" + +// " -Dsun.io.serialization.extendedDebugInfo=true" + +// " -XX:MaxInlineSize=1024" +// +// def assemblyPath = "./target/scala-2.10/triplerush-assembly-1.0-SNAPSHOT.jar" +// val assemblyFile = new File(assemblyPath) +// val localHost = new LocalHost +// // val googleDocs = new GoogleDocsResultHandler(args(0), args(1), "triplerush", "data") +// +// def getRevision: String = { +// try { +// val gitLogPath = ".git/logs/HEAD" +// val gitLog = new File(gitLogPath) +// val lines = Source.fromFile(gitLogPath).getLines +// val lastLine = lines.toList.last +// val revision = lastLine.split(" ")(1) +// revision +// } catch { +// case t: Throwable => "Unknown revision." +// } +// } +// +// /*********/ +// def evalName = s"DBPSB Profiling" +// def runs = 1 +// var evaluation = new Evaluation(evaluationName = evalName, executionHost = localHost) //.addResultHandler(googleDocs) +// /*********/ +// +// for (run <- 1 to runs) { +// for (optimizer <- List(QueryOptimizer.Clever)) { +// evaluation = evaluation.addEvaluationRun(dbpsbBenchmarkRun( +// evalName, +// false, +// Long.MaxValue, +// optimizer, +// getRevision)) +// } +// } +// evaluation.execute +// +// def dbpsbBenchmarkRun( +// description: String, +// sampling: Boolean, +// tickets: Long, +// optimizer: Int, +// revision: String)(): List[Map[String, String]] = { +// +// /** +// * Queries from: Trinity.RDF +// * +// * Times Trinity: 7 220 5 7 8 21 13 28 +// * Times TripleR: +// */ +// val game = -1 +// val title = -2 +// +// val var3 = -1 +// val var2 = -2 +// val var1 = -3 +// +// val musician = -1 +// val name = -2 +// val vdescription = -3 +// +// val person = -1 +// val birth = -2 +// val pname = -3 +// val death = -4 +// +// val car = -1 +// val man = -3 +// val manufacturer = -4 +// +// val bvar6 = -1 +// val bvar = -2 +// val bvar0 = -3 +// val bvar1 = -4 +// val bvar2 = -5 +// val bvar3 = -6 +// +// val s = -1 +// val player = -2 +// val position = -3 +// val club = -4 +// val cap = -5 +// val place = -6 +// val pop = -7 +// val tricot = -8 +// +// val m = Map( +// "http://www.w3.org/2004/02/skos/core#subject" -> 1, +// "http://dbpedia.org/resource/Category:First-person_shooters" -> 47406, +// "foaf:name" -> 41, +// "foaf:homepage" -> 653, +// "rdf#type" -> 16, +// "http://dbpedia.org/resource/Category:German_musicians" -> 187543, +// "rdfs#comment" -> 27, +// "dbo:birthPlace" -> 1132, +// "http://dbpedia.org/resource/Berlin" -> 19706, +// "dbo:birthDate" -> 436, +// "dbo:deathDate" -> 1177, +// "http://dbpedia.org/resource/Category:Luxury_vehicles" -> 322352, +// "dbo:manufacturer" -> 11736, +// "dbprop:name" -> 30, +// "dbprop:pages" -> 37409, +// "dbprop:isbn" -> 3385, +// "dbprop:author" -> 3371, +// "foaf:page" -> 39, +// "dbo:SoccerPlayer" -> 1723, +// "dbprop:position" -> 397, +// "dbprop:clubs" -> 1709, +// "dbo:capacity" -> 6306, +// "dbprop:population" -> 966, +// "dbo:number" -> 411) +// +// /** +// * Queries from Trinity.RDF paper +// * +// */ +// def fullQueries: List[QuerySpecification] = List( +// QuerySpecification(1, Array( +// TriplePattern(game, m("http://www.w3.org/2004/02/skos/core#subject"), m("http://dbpedia.org/resource/Category:First-person_shooters")), //?game . +// TriplePattern(game, m("foaf:name"), title)), //?game foaf:name ?title . +// new Array(2)), +// QuerySpecification(2, Array( +// TriplePattern(var3, m("foaf:homepage"), var2), //?var3 ?var2 . +// TriplePattern(var3, m("rdf#type"), var1)), //?var3 ?var +// new Array(3)), +// QuerySpecification(3, Array( +// TriplePattern(musician, m("http://www.w3.org/2004/02/skos/core#subject"), m("http://dbpedia.org/resource/Category:German_musicians")), //?musician . +// TriplePattern(musician, m("foaf:name"), name), //?musician foaf:name ?name . +// TriplePattern(musician, m("rdfs#comment"), vdescription)), //?musician rdfs:comment ?description +// new Array(3)), +// QuerySpecification(4, Array( +// TriplePattern(person, m("dbo:birthPlace"), m("http://dbpedia.org/resource/Berlin")), +// TriplePattern(person, m("dbo:birthDate"), birth), +// TriplePattern(person, m("foaf:name"), pname), +// TriplePattern(person, m("dbo:deathDate"), death)), +// new Array(4)), +// QuerySpecification(5, Array( +// TriplePattern(car, m("http://www.w3.org/2004/02/skos/core#subject"), m("http://dbpedia.org/resource/Category:Luxury_vehicles")), +// TriplePattern(car, m("foaf:name"), name), +// TriplePattern(car, m("dbo:manufacturer"), man), +// TriplePattern(man, m("foaf:name"), manufacturer)), +// new Array(4)), +// QuerySpecification(6, Array( +// TriplePattern(bvar6, m("rdf#type"), bvar), +// TriplePattern(bvar6, m("dbprop:name"), bvar0), +// TriplePattern(bvar6, m("dbprop:pages"), bvar1), +// TriplePattern(bvar6, m("dbprop:isbn"), bvar2), +// TriplePattern(bvar6, m("dbprop:author"), bvar3)), +// new Array(6)), +// QuerySpecification(7, Array( +// TriplePattern(bvar6, m("rdf#type"), bvar), +// TriplePattern(bvar6, m("dbprop:name"), bvar0), +// TriplePattern(bvar6, m("dbprop:pages"), bvar1), +// TriplePattern(bvar6, m("dbprop:isbn"), bvar2), +// TriplePattern(bvar6, m("dbprop:author"), bvar3)), +// new Array(6)), +// QuerySpecification(8, Array( +// TriplePattern(s, m("foaf:page"), player), +// TriplePattern(s, m("rdf#type"), m("dbo:SoccerPlayer")), +// TriplePattern(s, m("dbprop:position"), position), +// TriplePattern(s, m("dbprop:clubs"), club), +// TriplePattern(club, m("dbo:capacity"), cap), +// TriplePattern(s, m("dbo:birthPlace"), place), +// TriplePattern(place, m("dbprop:population"), pop), +// TriplePattern(s, m("dbo:number"), tricot)), +// new Array(8))) +// val queries = { +// require(!sampling && tickets == Long.MaxValue) +// fullQueries +// } +// +// var baseResults = Map[String, String]() +// val qe = new QueryEngine(GraphBuilder.withMessageBusFactory( +// new BulkAkkaMessageBusFactory(1024, false)). +// withMessageSerialization(false). +// withAkkaMessageCompression(true)) +// println(s"Local folder: ${(new File(".")).getAbsolutePath}") +// +// def loadDbpsb { +// val dbpsbFolderName = s"dbpsb10-filtered-splits" +// for (splitId <- 0 until 576) { //576 +// qe.loadBinary(s"./$dbpsbFolderName/$splitId.filtered-split", Some(splitId)) +// if (splitId % 288 == 287) { +// println(s"Dispatched up to split #$splitId/2880, awaiting idle.") +// qe.awaitIdle +// println(s"Continuing graph loading...") +// } +// } +// } +// +// /** +// * Returns the time in milliseconds it takes to execute the code in 'codeBlock'. +// */ +// def measureTime(codeBlock: => Unit): Long = { +// val startTime = System.currentTimeMillis +// codeBlock +// val finishTime = System.currentTimeMillis +// finishTime - startTime +// } +// +// def roundToMillisecondFraction(nanoseconds: Long): Double = { +// ((nanoseconds / 100000.0).round) / 10.0 +// } +// +// def executeOnQueryEngine(q: QuerySpecification): QueryResult = { +// val resultFuture = qe.executeQuery(q, optimizer) +// try { +// Await.result(resultFuture, new FiniteDuration(1000, TimeUnit.SECONDS)) // TODO handle exception +// } catch { +// case t: Throwable => +// println(s"Query $q timed out!") +// QueryResult(UnrolledBuffer(), Array("exception"), Array(t)) +// } +// } +// +// /** +// * Go to JVM JIT steady state by executing the query 100 times. +// */ +// def jitSteadyState { +// for (i <- 1 to 100) { +// for (queryId <- 1 to 8) { +// val queryIndex = queryId - 1 +// val query = fullQueries(queryIndex) +// print(s"Warming up with query $query ...") +// executeOnQueryEngine(query) +// qe.awaitIdle +// println(s" Done.") +// } +// } +// } +// +// def cleanGarbage { +// for (i <- 1 to 10) { +// System.gc +// Thread.sleep(100) +// } +// Thread.sleep(10000) +// } +// +// var finalResults = List[Map[String, String]]() +// def runEvaluation(queryId: Int) { +// var runResult = baseResults +// var date: Date = new Date +// val queryIndex = queryId - 1 +// val query = queries(queryIndex) +// val startTime = System.nanoTime +// val queryResult = executeOnQueryEngine(query) +// val queryStats: Map[Any, Any] = (queryResult.statKeys zip queryResult.statVariables).toMap.withDefaultValue("") +// val finishTime = System.nanoTime +// val executionTime = roundToMillisecondFraction(finishTime - startTime) +// val timeToFirstResult = roundToMillisecondFraction(queryStats("firstResultNanoTime").asInstanceOf[Long] - startTime) +// val optimizingTime = roundToMillisecondFraction(queryStats("optimizingDuration").asInstanceOf[Long]) +// runResult += s"revision" -> revision +// runResult += s"queryId" -> queryId.toString +// runResult += s"optimizer" -> optimizer.toString +// runResult += s"queryCopyCount" -> queryStats("queryCopyCount").toString +// runResult += s"query" -> queryStats("optimizedQuery").toString +// runResult += s"exception" -> queryStats("exception").toString +// runResult += s"results" -> queryResult.bindings.length.toString +// runResult += s"executionTime" -> executionTime.toString +// runResult += s"timeUntilFirstResult" -> timeToFirstResult.toString +// runResult += s"optimizingTime" -> optimizingTime.toString +// runResult += s"totalMemory" -> bytesToGigabytes(Runtime.getRuntime.totalMemory).toString +// runResult += s"freeMemory" -> bytesToGigabytes(Runtime.getRuntime.freeMemory).toString +// runResult += s"usedMemory" -> bytesToGigabytes(Runtime.getRuntime.totalMemory - Runtime.getRuntime.freeMemory).toString +// runResult += s"loadNumber" -> 10.toString +// runResult += s"date" -> date.toString +// runResult += s"dataSet" -> s"dbpsb10" +// finalResults = runResult :: finalResults +// } +// +// def bytesToGigabytes(bytes: Long): Double = ((bytes / 1073741824.0) * 10.0).round / 10.0 +// +// baseResults += "evaluationDescription" -> description +// val loadingTime = measureTime { +// println("Dispatching loading command to worker...") +// loadDbpsb +// qe.awaitIdle +// } +// baseResults += "loadingTime" -> loadingTime.toString +// +// println("Starting warm-up...") +// jitSteadyState +// //cleanGarbage +// println(s"Finished warm-up.") +// println("Please connect profiler and press any key.") +// readLine +// for (queryId <- 1 to 8) { +// println(s"Running evaluation for query $queryId.") +// runEvaluation(queryId) +// println(s"Done running evaluation for query $queryId. Awaiting idle") +// qe.awaitIdle +// println("Idle") +// } +// +// qe.shutdown +// finalResults +// } +// +//} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/LubmBenchmark.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/LubmBenchmark.scala new file mode 100644 index 0000000..fc91a0f --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/LubmBenchmark.scala @@ -0,0 +1,314 @@ +///* +// * @author Philip Stutz +// * @author Mihaela Verman +// * +// * Copyright 2013 University of Zurich +// * +// * Licensed under the Apache License, Version 2.0 (the "License"); +// * you may not use this file except in compliance with the License. +// * You may obtain a copy of the License at +// * +// * http://www.apache.org/licenses/LICENSE-2.0 +// * +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// * +// */ +// +//package com.signalcollect.triplerush.evaluation +// +//import java.io.File +//import java.util.Date +//import java.util.concurrent.TimeUnit +//import scala.concurrent.Await +//import scala.concurrent.duration.FiniteDuration +//import scala.concurrent.duration._ +//import scala.io.Source +//import scala.util.Random +//import com.signalcollect.GraphBuilder +//import com.signalcollect.factory.messagebus.BulkAkkaMessageBusFactory +//import com.signalcollect.nodeprovisioning.torque.LocalHost +//import com.signalcollect.nodeprovisioning.torque.TorqueHost +//import com.signalcollect.nodeprovisioning.torque.TorqueJobSubmitter +//import com.signalcollect.nodeprovisioning.torque.TorqueNodeProvisioner +//import com.signalcollect.nodeprovisioning.torque.TorquePriority +//import com.signalcollect.triplerush.Mapping +//import com.signalcollect.triplerush.QueryParticle +//import com.signalcollect.triplerush.QueryEngine +//import com.signalcollect.triplerush.vertices.QueryOptimizer +//import com.signalcollect.triplerush.TriplePattern +//import com.signalcollect.triplerush.Mapping +//import akka.event.Logging +//import com.signalcollect.triplerush.QuerySpecification +//import scala.collection.mutable.UnrolledBuffer +//import java.lang.management.ManagementFactory +//import collection.JavaConversions._ +//import language.postfixOps +//import com.signalcollect.triplerush.TripleRush +//import com.signalcollect.nodeprovisioning.torque.TorqueNodeProvisioner +// +//object LubmBenchmark extends App { +// def jvmParameters = " -Xmx31000m" + +// " -Xms31000m" + +// " -XX:+AggressiveOpts" + +// " -XX:+AlwaysPreTouch" + +// " -XX:+UseNUMA" + +// " -XX:-UseBiasedLocking" + +// " -XX:MaxInlineSize=1024" +// +// def assemblyPath = "./target/scala-2.10/triplerush-assembly-1.0-SNAPSHOT.jar" +// val assemblyFile = new File(assemblyPath) +// val kraken = new TorqueHost( +// jobSubmitter = new TorqueJobSubmitter(username = System.getProperty("user.name"), hostname = "kraken.ifi.uzh.ch"), +// localJarPath = assemblyPath, jvmParameters = jvmParameters, jdkBinPath = "/home/user/stutz/jdk1.7.0/bin/", priority = TorquePriority.fast) +// val localHost = new LocalHost +// val googleDocs = new GoogleDocsResultHandler(args(0), args(1), "triplerush", "data") +// +// def getRevision: String = { +// try { +// val gitLogPath = ".git/logs/HEAD" +// val gitLog = new File(gitLogPath) +// val lines = Source.fromFile(gitLogPath).getLines +// val lastLine = lines.toList.last +// val revision = lastLine.split(" ")(1) +// revision +// } catch { +// case t: Throwable => "Unknown revision." +// } +// } +// +// /*********/ +// def evalName = s"LUBM KRAKEN Rewired index experiment." +// def runs = 1 +// var evaluation = new Evaluation(evaluationName = evalName, executionHost = kraken).addResultHandler(googleDocs) +// // var evaluation = new Evaluation(evaluationName = evalName, executionHost = localHost).addResultHandler(googleDocs) +// /*********/ +// +// for (unis <- List(160)) { //10, 20, 40, 80, 160, 320, 480, 800 +// for (run <- 1 to runs) { +// for (optimizer <- List(QueryOptimizer.Clever)) { +// evaluation = evaluation.addEvaluationRun(lubmBenchmarkRun( +// evalName, +// false, +// Long.MaxValue, +// optimizer, +// getRevision, +// unis)) +// } +// } +// } +// evaluation.execute +// +// def lubmBenchmarkRun( +// description: String, +// sampling: Boolean, +// tickets: Long, +// optimizer: Int, +// revision: String, +// universities: Int)(): List[Map[String, String]] = { +// +// val queries = LubmQueries.fullQueries +// +// var baseResults = Map[String, String]() +// val krakenFromKraken = new TorqueHost( +// jobSubmitter = new TorqueJobSubmitter(username = System.getProperty("user.name"), hostname = "kraken.ifi.uzh.ch"), +// localJarPath = "/home/user/stutz/triplerush-assembly-1.0-SNAPSHOT.jar", jvmParameters = jvmParameters, jdkBinPath = "/home/user/stutz/jdk1.7.0/bin/", priority = TorquePriority.fast) +// // +// val numberOfNodes = 8 +// val graphBuilder = GraphBuilder. +// // withLoggingLevel(Logging.DebugLevel). +// withNodeProvisioner(new TorqueNodeProvisioner(krakenFromKraken, numberOfNodes, allocateWorkersOnCoordinatorNode = true, copyExecutable = false)) +// val qe = new TripleRush(graphBuilder) +// def loadLubm { +// val lubmFolderName = s"lubm$universities-filtered-splits" +// for (splitId <- 0 until 2880) { +// val splitFile = s"./$lubmFolderName/$splitId.filtered-split" +// qe.loadBinary(splitFile, Some(splitId)) +// if (splitId % 288 == 279) { +// println(s"Dispatched up to split #$splitId/2880, awaiting idle.") +// qe.awaitIdle +// println(s"Continuing graph loading...") +// } +// } +// } +// +// /** +// * Returns the time in milliseconds it takes to execute the code in 'codeBlock'. +// */ +// def measureTime(codeBlock: => Unit): Long = { +// val startTime = System.currentTimeMillis +// codeBlock +// val finishTime = System.currentTimeMillis +// finishTime - startTime +// } +// +// def roundToMillisecondFraction(nanoseconds: Long): Double = { +// ((nanoseconds / 100000.0).round) / 10.0 +// } +// +// def jitRepetitions = 100 +// +// /** +// * Go to JVM JIT steady state by executing the queries multiple times. +// */ +// def jitSteadyState { +// for (i <- 1 to jitRepetitions) { +// for (queryId <- 1 to 7) { +// val queryIndex = queryId - 1 +// val query = queries(queryIndex).toParticle +// print(s"Warming up with query ${new QueryParticle(query).queryId} ...") +// qe.executeQuery(query) +// qe.awaitIdle +// println(s" Done.") +// } +// } +// } +// +// lazy val gcs = ManagementFactory.getGarbageCollectorMXBeans +// +// def getGcCollectionTime: Long = { +// gcs map (_.getCollectionTime) sum +// } +// +// def lastGcId: Long = { +// val sunGcs = gcs map (_.asInstanceOf[com.sun.management.GarbageCollectorMXBean]) +// val gcIds = sunGcs. +// map(_.getLastGcInfo). +// flatMap(info => if (info != null) Some(info.getId) else None) +// if (gcIds.isEmpty) 0 else gcIds.max +// } +// +// def freedDuringLastGc: Long = { +// val sunGcs = gcs map (_.asInstanceOf[com.sun.management.GarbageCollectorMXBean]) +// val usedBeforeLastGc = sunGcs. +// map(_.getLastGcInfo). +// map(_.getMemoryUsageBeforeGc). +// flatMap(_.values). +// map(_.getCommitted). +// sum +// val usedAfterLastGc = sunGcs. +// map(_.getLastGcInfo). +// map(_.getMemoryUsageAfterGc). +// flatMap(_.values). +// map(_.getCommitted). +// sum +// val freedDuringLastGc = usedBeforeLastGc - usedAfterLastGc +// freedDuringLastGc +// } +// +// def getGcCollectionCount: Long = { +// gcs map (_.getCollectionCount) sum +// } +// +// lazy val compilations = ManagementFactory.getCompilationMXBean +// +// lazy val javaVersion = ManagementFactory.getRuntimeMXBean.getVmVersion +// +// lazy val jvmLibraryPath = ManagementFactory.getRuntimeMXBean.getLibraryPath +// +// lazy val jvmArguments = ManagementFactory.getRuntimeMXBean.getInputArguments +// +// def cleanGarbage { +// for (i <- 1 to 10) { +// System.runFinalization +// System.gc +// Thread.sleep(10000) +// } +// Thread.sleep(120000) +// } +// +// var finalResults = List[Map[String, String]]() +// def runEvaluation(queryId: Int) { +// var runResult = baseResults +// var date: Date = new Date +// val queryIndex = queryId - 1 +// val query = queries(queryIndex) +// val particle = query.toParticle +// val gcTimeBefore = getGcCollectionTime +// val gcCountBefore = getGcCollectionCount +// val compileTimeBefore = compilations.getTotalCompilationTime +// runResult += ((s"totalMemoryBefore", bytesToGigabytes(Runtime.getRuntime.totalMemory).toString)) +// runResult += ((s"freeMemoryBefore", bytesToGigabytes(Runtime.getRuntime.freeMemory).toString)) +// runResult += ((s"usedMemoryBefore", bytesToGigabytes(Runtime.getRuntime.totalMemory - Runtime.getRuntime.freeMemory).toString)) +// val startTime = System.nanoTime +// val (queryResultFuture, queryStatsFuture) = qe.executeAdvancedQuery(particle) +// val queryResult = Await.result(queryResultFuture, 7200 seconds) +// val finishTime = System.nanoTime +// val executionTime = roundToMillisecondFraction(finishTime - startTime) +// val gcTimeAfter = getGcCollectionTime +// val gcCountAfter = getGcCollectionCount +// val gcTimeDuringQuery = gcTimeAfter - gcTimeBefore +// val gcCountDuringQuery = gcCountAfter - gcCountBefore +// val compileTimeAfter = compilations.getTotalCompilationTime +// val compileTimeDuringQuery = compileTimeAfter - compileTimeBefore +// val queryStats = Await.result(queryStatsFuture, 10 seconds) +// val optimizingTime = roundToMillisecondFraction(queryStats("optimizingDuration").asInstanceOf[Long]) +// runResult += ((s"revision", revision)) +// runResult += ((s"queryId", queryId.toString)) +// runResult += ((s"optimizer", optimizer.toString)) +// runResult += ((s"queryCopyCount", queryStats("queryCopyCount").toString)) +// runResult += ((s"query", queryStats("optimizedQuery").toString)) +// runResult += ((s"exception", queryStats("exception").toString)) +// runResult += ((s"results", queryResult.length.toString)) +// runResult += ((s"executionTime", executionTime.toString)) +// runResult += ((s"optimizingTime", optimizingTime.toString)) +// runResult += ((s"totalMemory", bytesToGigabytes(Runtime.getRuntime.totalMemory).toString)) +// runResult += ((s"freeMemory", bytesToGigabytes(Runtime.getRuntime.freeMemory).toString)) +// runResult += ((s"usedMemory", bytesToGigabytes(Runtime.getRuntime.totalMemory - Runtime.getRuntime.freeMemory).toString)) +// runResult += ((s"executionHostname", java.net.InetAddress.getLocalHost.getHostName)) +// runResult += (("gcTimeAfter", gcTimeAfter.toString)) +// runResult += (("gcCountAfter", gcCountAfter.toString)) +// runResult += (("gcTimeDuringQuery", gcTimeDuringQuery.toString)) +// runResult += (("gcCountDuringQuery", gcCountDuringQuery.toString)) +// runResult += (("compileTimeAfter", compileTimeAfter.toString)) +// runResult += (("compileTimeDuringQuery", compileTimeDuringQuery.toString)) +// runResult += s"loadNumber" -> universities.toString +// runResult += s"date" -> date.toString +// runResult += s"dataSet" -> s"lubm$universities" +// finalResults = runResult :: finalResults +// } +// +// def bytesToGigabytes(bytes: Long): Double = ((bytes / 1073741824.0) * 10.0).round / 10.0 +// +// baseResults += (("evaluationDescription", description)) +// baseResults += (("jitRepetitions", jitRepetitions.toString)) +// baseResults += (("java.runtime.version", System.getProperty("java.runtime.version"))) +// baseResults += (("javaVmVersion", javaVersion)) +// baseResults += (("jvmLibraryPath", jvmLibraryPath)) +// baseResults += (("jvmArguments", jvmArguments.mkString(" "))) +// +// val loadingTime = measureTime { +// println("Dispatching loading command to workers...") +// loadLubm +// qe.prepareExecution +// } +// baseResults += (("loadingTime", loadingTime.toString)) +// +// val loadingTime = measureTime { +// println("Dispatching loading command to workers...") +// loadLubm +// qe.prepareExecution +// } +// +// runResult += s"loadNumber" -> universities.toString +// runResult += s"dataSet" -> s"lubm$universities" +// baseResults += (("loadingTime", loadingTime.toString)) +// +// println("Starting warm-up...") +// jitSteadyState(queries, tr, warmupRepetitions) +// cleanGarbage +// println(s"Finished warm-up.") +// for (queryId <- 1 to 7) { +// println(s"Running evaluation for query $queryId.") +// runEvaluation(queryId) +// println(s"Done running evaluation for query $queryId. Awaiting idle") +// qe.awaitIdle +// println("Idle") +// } +// tr.shutdown +// finalResults +// } +//} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/LubmQueries.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/LubmQueries.scala new file mode 100644 index 0000000..1d043a1 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/LubmQueries.scala @@ -0,0 +1,143 @@ +/* + * @author Philip Stutz + * @author Mihaela Verman + * + * Copyright 2013 University of Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.signalcollect.triplerush.evaluation + +import com.signalcollect.triplerush.QuerySpecification +import com.signalcollect.triplerush.TriplePattern + +object QueryEncoding { + def apply(id: String) = m(id) + + def ub = "http://swat.cse.lehigh.edu/onto/univ-bench.owl" + def rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns" +//http://www.w3.org/1999/02/22-rdf-syntax-ns#type + def m = Map( + (s"$rdf#type", 1), + (s"$ub#GraduateStudent", 2), + (s"$ub#undergraduateDegreeFrom", 3), + (s"$ub#memberOf", 4), + (s"$ub#Department", 5), + (s"$ub#subOrganizationOf", 6), + (s"$ub#University", 7), + (s"$ub#Course", 8), + (s"$ub#name", 9), + (s"$ub#UndergraduateStudent", 10), + (s"$ub#worksFor", 11), + (s"http://www.Department0.University0.edu", 12), + (s"$ub#FullProfessor", 13), + (s"$ub#emailAddress", 14), + (s"$ub#telephone", 15), + (s"$ub#ResearchGroup", 16), + (s"http://www.University0.edu", 17), + (s"$ub#teacherOf", 18), + (s"$ub#advisor", 19), + (s"$ub#takesCourse", 20)) +// (s"http://www.w3.org/2004/02/skos/core#subject", 21), +// (s"http://dbpedia.org/resource/Category:First-person_shooters", 22), +// (s"$foaf:name", 23), +// (s"$foaf:homepage", 24), +// (s"rdf#type", 25), +// (s"http://dbpedia.org/resource/Category:German_musicians", 26), +// (s"$rdfs#comment", 27), +// (s"$dbo:birthPlace", 28), +// (s"http://dbpedia.org/resource/Berlin", 29), +// (s"$dbo:birthDate", 30), +// (s"$dbo:deathDate", 31), +// (s"http://dbpedia.org/resource/Category:Luxury_vehicles", 32), +// (s"$dbo:manufacturer", 33), +// (s"$dbprop:name", 34), +// (s"$dbprop:pages", 35), +// (s"$dbprop:isbn", 36), +// (s"$dbprop:author", 37), +// (s"$foaf:page", 38), +// (s"$dbo:SoccerPlayer", 39), +// (s"$dbprop:position", 40), +// (s"$dbprop:clubs", 41), +// (s"$dbo:capacity", 42), +// (s"$dbprop:population", 43), +// (s"$dbo:number", 44) +} + +object LubmQueries { + + /** + * Queries from: http://www.cs.rpi.edu/~zaki/PaperDir/WWW10.pdf + * Result sizes from: http://research.microsoft.com/pubs/183717/Trinity.RDF.pdf + * L1 L2 L3 L4 L5 L6 L7 + * LUBM-160 397 173040 0 10 10 125 7125 + * LUBM-10240 2502 11016920 0 10 10 125 450721 + * + * Times Trinity: 281 132 110 5 4 9 630 + */ + val x = -1 + val y = -2 + val z = -3 + + import QueryEncoding._ + + def fullQueries: List[QuerySpecification] = List( + QuerySpecification(List( + TriplePattern(x, QueryEncoding(s"$rdf#type"), QueryEncoding(s"$ub#GraduateStudent")), // ?X rdf:type ub:GraduateStudent + TriplePattern(x, QueryEncoding(s"$ub#undergraduateDegreeFrom"), y), // ?X ub:undergraduateDegreeFrom ?Y + TriplePattern(x, QueryEncoding(s"$ub#memberOf"), z), // ?X ub:memberOf ?Z + TriplePattern(z, QueryEncoding(s"$rdf#type"), QueryEncoding(s"$ub#Department")), // ?Z rdf:type ub:Department + TriplePattern(z, QueryEncoding(s"$ub#subOrganizationOf"), y), // ?Z ub:subOrganizationOf ?Y + TriplePattern(y, QueryEncoding(s"$rdf#type"), QueryEncoding(s"$ub#University")) // ?Y rdf:type ub:University + )), + QuerySpecification(List( + TriplePattern(x, QueryEncoding(s"$rdf#type"), QueryEncoding(s"$ub#Course")), // ?X rdf:type ub:Course + TriplePattern(x, QueryEncoding(s"$ub#name"), y) // ?X ub:name ?Y), + )), + QuerySpecification(List( + TriplePattern(x, QueryEncoding(s"$ub#undergraduateDegreeFrom"), y), // ?X ub:undergraduateDegreeFrom ?Y + TriplePattern(x, QueryEncoding(s"$rdf#type"), QueryEncoding(s"$ub#UndergraduateStudent")), // ?X rdf:type ub:UndergraduateStudent + TriplePattern(x, QueryEncoding(s"$ub#memberOf"), z), // ?X ub:memberOf ?Z + TriplePattern(z, QueryEncoding(s"$ub#subOrganizationOf"), y), // ?Z ub:subOrganizationOf ?Y + TriplePattern(z, QueryEncoding(s"$rdf#type"), QueryEncoding(s"$ub#Department")), // ?Z rdf:type ub:Department + TriplePattern(y, QueryEncoding(s"$rdf#type"), QueryEncoding(s"$ub#University")) // ?Y rdf:type ub:University + )), + QuerySpecification(List( + TriplePattern(x, QueryEncoding(s"$ub#worksFor"), QueryEncoding("http://www.Department0.University0.edu")), // ?X ub:worksFor http://www.Department0.University0.edu + TriplePattern(x, QueryEncoding(s"$rdf#type"), QueryEncoding(s"$ub#FullProfessor")), // ?X rdf:type ub:FullProfessor + TriplePattern(x, QueryEncoding(s"$ub#name"), y), // ?X ub:name ?Y1 + TriplePattern(x, QueryEncoding(s"$ub#emailAddress"), z), // ?X ub:emailAddress ?Y2 + TriplePattern(x, QueryEncoding(s"$ub#telephone"), -4) // ?X ub:telephone ?Y3 + )), + QuerySpecification(List( + TriplePattern(x, QueryEncoding(s"$ub#subOrganizationOf"), QueryEncoding("http://www.Department0.University0.edu")), // ?X ub:subOrganizationOf http://www.Department0.University0.edu + TriplePattern(x, QueryEncoding(s"$rdf#type"), QueryEncoding(s"$ub#ResearchGroup")) // ?X rdf:type ub:ResearchGroup + )), + QuerySpecification(List( + TriplePattern(y, QueryEncoding(s"$ub#subOrganizationOf"), QueryEncoding("http://www.University0.edu")), // ?Y ub:subOrganizationOf http://www.University0.edu + TriplePattern(y, QueryEncoding(s"$rdf#type"), QueryEncoding(s"$ub#Department")), //?Y rdf:type ub:Department + TriplePattern(x, QueryEncoding(s"$ub#worksFor"), y), // ?X ub:worksFor ?Y + TriplePattern(x, QueryEncoding(s"$rdf#type"), QueryEncoding(s"$ub#FullProfessor")) // ?X rdf:type ub:FullProfessor + )), + QuerySpecification(List( + TriplePattern(y, QueryEncoding(s"$rdf#type"), QueryEncoding(s"$ub#FullProfessor")), // ?Y rdf:type ub:FullProfessor + TriplePattern(y, QueryEncoding(s"$ub#teacherOf"), z), // ?Y ub:teacherOf ?Z + TriplePattern(z, QueryEncoding(s"$rdf#type"), QueryEncoding(s"$ub#Course")), // ?Z rdf:type ub:Course + TriplePattern(x, QueryEncoding(s"$ub#advisor"), y), // ?X ub:advisor ?Y + TriplePattern(x, QueryEncoding(s"$ub#takesCourse"), z), // ?X ub:takesCourse ?Z + TriplePattern(x, QueryEncoding(s"$rdf#type"), QueryEncoding(s"$ub#UndergraduateStudent")) // ?X rdf:type ub:UndergraduateStudent + ))) + +} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/NewLubmEvaluation.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/NewLubmEvaluation.scala new file mode 100644 index 0000000..cc03d23 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/NewLubmEvaluation.scala @@ -0,0 +1,124 @@ +package com.signalcollect.triplerush.evaluation + +import com.signalcollect.triplerush.TripleRush +import com.signalcollect.nodeprovisioning.torque.TorquePriority +import com.signalcollect.nodeprovisioning.torque.LocalHost +import com.signalcollect.triplerush.optimizers.CleverCardinalityOptimizer +import com.signalcollect.triplerush.TripleRush +import com.signalcollect.triplerush.optimizers.Optimizer +import com.signalcollect.triplerush.Mapping + +object NewLubmEvaluation extends App { + + import EvalHelpers._ + import Optimizer._ + + val googleDocs = new GoogleDocsResultHandler(args(0), args(1), "triplerush", "data") + def local = new LocalHost + def torquePriority = TorquePriority.fast + def runs = 1 + def warmupRepetitions = 0 + def shouldCleanGarbage = false + def description = "Clever vs. predicate selectivity on LUBM640." + + var evaluation = new Evaluation( + executionHost = kraken(torquePriority)).addResultHandler(googleDocs) + + // var evaluation = new Evaluation( + // executionHost = local).addResultHandler(googleDocs) + + for (numberOfNodes <- List(4)) { + for (universities <- List(640)) { //10, 20, 40, 80, 160, 320, 480, 800 + for (run <- 1 to runs) { + for (optimizer <- List(clever, predicateSelectivity)) { //clever,predicateSelectivity,bibekPredicateSelectivity + val eval = new LubmEvalRun( + description, + shouldCleanGarbage, + universities, + numberOfNodes, + torquePriority, + warmupRepetitions, + optimizer, + getRevision) + evaluation = evaluation.addEvaluationRun(eval.evaluationRun _) + } + } + } + } + evaluation.execute +} + +case class LubmEvalRun( + description: String, + shouldCleanGarbage: Boolean, + universities: Int, + numberOfNodes: Int, + torquePriority: String, + warmupRepetitions: Int, + optimizerCreator: TripleRush => Option[Optimizer], + revision: String) extends TriplerushEval { + + import EvalHelpers._ + + def evaluationRun: List[Map[String, String]] = { + val tr = initializeTr(initializeGraphBuilder) + val loadingTime = measureTime { + println("Dispatching loading command to workers...") + loadLubm(universities, tr) + tr.prepareExecution + } + + val optimizerInitStart = System.nanoTime + val optimizer = optimizerCreator(tr) + val optimizerInitEnd = System.nanoTime + val queries = LubmQueries.fullQueries + var finalResults = List[Map[String, String]]() + var commonResults = baseStats + + commonResults += ((s"optimizerInitialisationTime", roundToMillisecondFraction(optimizerInitEnd - optimizerInitStart).toString)) + commonResults += ((s"optimizerName", optimizer.toString)) + commonResults += (("loadingTime", loadingTime.toString)) + commonResults += s"loadNumber" -> universities.toString + commonResults += s"dataSet" -> s"lubm$universities" + + println("Starting warm-up...") + + for (i <- 1 to warmupRepetitions) { + println(s"running warmup $i/$warmupRepetitions") + for (query <- queries) { + tr.executeAdvancedQuery(query, optimizer) + tr.awaitIdle + } + } + println(s"warmup finished") + + if (shouldCleanGarbage) { + cleanGarbage + } + println(s"Finished warm-up.") + for (queryId <- 1 to queries.size) { + println(s"Running evaluation for query $queryId.") + val result = runEvaluation(queries(queryId - 1), queryId.toString, optimizer, tr, commonResults) + finalResults = result :: finalResults + println(s"Done running evaluation for query $queryId. Awaiting idle") + tr.awaitIdle + println("Idle") + } + tr.shutdown + finalResults + } + + def loadLubm(universities: Int, triplerush: TripleRush) { + val lubmFolderName = s"lubm$universities-filtered-splits" + for (splitId <- 0 until 2880) { + val splitFile = s"./$lubmFolderName/$splitId.filtered-split" + triplerush.loadBinary(splitFile, Some(splitId)) + if (splitId % 288 == 279) { + println(s"Dispatched up to split #$splitId/2880, awaiting idle.") + triplerush.awaitIdle + println(s"Continuing graph loading...") + } + } + } + +} diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/OldDbpsbBenchmark.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/OldDbpsbBenchmark.scala new file mode 100644 index 0000000..dcdf036 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/OldDbpsbBenchmark.scala @@ -0,0 +1,418 @@ +///* +// * @author Philip Stutz +// * @author Mihaela Verman +// * +// * Copyright 2013 University of Zurich +// * +// * Licensed under the Apache License, Version 2.0 (the "License"); +// * you may not use this file except in compliance with the License. +// * You may obtain a copy of the License at +// * +// * http://www.apache.org/licenses/LICENSE-2.0 +// * +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// * +// */ +// +//package com.signalcollect.triplerush.evaluation +// +//import java.io.File +//import java.util.Date +//import java.util.concurrent.TimeUnit +//import scala.concurrent.Await +//import scala.concurrent.duration.FiniteDuration +//import scala.io.Source +//import scala.util.Random +//import com.signalcollect.GraphBuilder +//import com.signalcollect.factory.messagebus.BulkAkkaMessageBusFactory +//import com.signalcollect.nodeprovisioning.torque.LocalHost +//import com.signalcollect.nodeprovisioning.torque.TorqueHost +//import com.signalcollect.nodeprovisioning.torque.TorqueJobSubmitter +//import com.signalcollect.nodeprovisioning.torque.TorqueNodeProvisioner +//import com.signalcollect.nodeprovisioning.torque.TorquePriority +//import com.signalcollect.triplerush.Mapping +//import com.signalcollect.triplerush.QueryParticle +//import com.signalcollect.triplerush.QueryEngine +//import com.signalcollect.triplerush.QueryOptimizer +//import com.signalcollect.triplerush.TriplePattern +//import com.signalcollect.triplerush.Mapping +//import akka.event.Logging +//import com.signalcollect.triplerush.QueryResult +// +///** +// * Runs a PageRank algorithm on a graph of a fixed size +// * for different numbers of worker threads. +// * +// * Evaluation is set to execute on a 'Kraken'-node. +// */ +//object OldDbpsbBenchmark extends App { +// def jvmHighThroughputGc = " -Xmx64000m" + +// " -Xms64000m" + +// " -Xmn8000m" + +// " -d64" + +// " -XX:+UnlockExperimentalVMOptions" + +// " -XX:+UseConcMarkSweepGC" + +// " -XX:+UseParNewGC" + +// " -XX:+CMSIncrementalPacing" + +// " -XX:+CMSIncrementalMode" + +// " -XX:ParallelGCThreads=20" + +// " -XX:ParallelCMSThreads=20" + +// " -XX:-PrintCompilation" + +// " -XX:-PrintGC" + +// " -Dsun.io.serialization.extendedDebugInfo=true" + +// " -XX:MaxInlineSize=1024" +// +// def jvmParameters = " -Xmx64000m" + +// " -Xms64000m" +// def assemblyPath = "./target/scala-2.10/triplerush-assembly-1.0-SNAPSHOT.jar" +// val assemblyFile = new File(assemblyPath) +// // val jobId = Random.nextInt % 10000 +// // def copyName = assemblyPath.replace("-SNAPSHOT", jobId.toString) +// // assemblyFile.renameTo(new File(assemblyPath)) +// val kraken = new TorqueHost( +// jobSubmitter = new TorqueJobSubmitter(username = System.getProperty("user.name"), hostname = "kraken.ifi.uzh.ch"), +// localJarPath = assemblyPath, jvmParameters = jvmHighThroughputGc, priority = TorquePriority.superfast) +// val localHost = new LocalHost +// val googleDocs = new GoogleDocsResultHandler(args(0), args(1), "triplerush", "data") +// +// def getRevision: String = { +// try { +// val gitLogPath = ".git/logs/HEAD" +// val gitLog = new File(gitLogPath) +// val lines = Source.fromFile(gitLogPath).getLines +// val lastLine = lines.toList.last +// val revision = lastLine.split(" ")(1) +// revision +// } catch { +// case t: Throwable => "Unknown revision." +// } +// } +// +// /*********/ +// def evalName = "DBPSB eval." +// // def evalName = "Local debugging." +// def runs = 1 +// var evaluation = new Evaluation(evaluationName = evalName, executionHost = kraken).addResultHandler(googleDocs) +// // var evaluation = new Evaluation(evaluationName = evalName, executionHost = localHost).addResultHandler(googleDocs) +// /*********/ +// +// for (run <- 1 to runs) { +// // for (queryId <- 1 to 1) { +// for (optimizer <- List(QueryOptimizer.Clever)) { +// //for (tickets <- List(1000, 10000, 100000, 1000000)) { +// //evaluation = evaluation.addEvaluationRun(lubmBenchmarkRun(evalName, queryId, true, tickets)) +// // evaluation = evaluation.addEvaluationRun(lubmBenchmarkRun(evalName, queryId, false, tickets)) +// // } +// evaluation = evaluation.addEvaluationRun(lubmBenchmarkRun( +// evalName, +// //queryId, +// false, +// Long.MaxValue, +// optimizer, +// getRevision)) +// } +// // } +// } +// evaluation.execute +// +// def lubmBenchmarkRun( +// description: String, +// //queryId: Int, +// sampling: Boolean, +// tickets: Long, +// optimizer: Int, +// revision: String)(): List[Map[String, String]] = { +// +// /** +// * Queries from Trinity.RDF paper +// * +// * +// * PREFIX foaf: +// * SELECT ?title +// * WHERE { +// * ?game . +// * ?game foaf:name ?title . +// * } +// * ----- +// * SELECT ?var +// * WHERE { +// * ?var3 ?var2 . +// * ?var3 ?var . +// * } +// * ------ +// * +// * PREFIX rdfs: +// * PREFIX foaf: +// * SELECT ?name ?description ?musician WHERE { +// * ?musician . +// * ?musician foaf:name ?name . +// * ?musician rdfs:comment ?description . +// * } +// * ------- +// * PREFIX dbo: +// * PREFIX foaf: +// * SELECT ?name ?birth ?death ?person WHERE { +// * ?person dbo:birthPlace . +// * ?person dbo:birthDate ?birth . +// * ?person foaf:name ?name . +// * ?person dbo:deathDate ?death . +// * } +// * ----- +// * +// * PREFIX dbo: +// * PREFIX foaf: +// * SELECT ?manufacturer ?name ?car +// * WHERE { +// * ?car . +// * ?car foaf:name ?name . +// * ?car dbo:manufacturer ?man . +// * ?man foaf:name ?manufacturer +// * } +// * +// * --------- +// * PREFIX rdf: +// * PREFIX dbpprop: +// * SELECT ?var0 ?var1 ?var2 ?var3 where +// * { +// * ?var6 rdf:type ?var. +// * ?var6 dbpprop:name ?var0. +// * ?var6 dbpprop:pages ?var1. +// * ?var6 dbpprop:isbn ?var2. +// * ?var6 dbpprop:author ?var3. +// * } +// * ----- +// * PREFIX rdf: +// * PREFIX dbpprop: +// * SELECT ?var where +// * { +// * ?var6 rdf:type ?var. +// * ?var6 dbpprop:name ?var0. +// * ?var6 dbpprop:pages ?var1. +// * ?var6 dbpprop:isbn ?var2. +// * ?var6 dbpprop:author ?var3. +// * } +// * ----- +// * +// * PREFIX rdf: +// * PREFIX dbpedia2: +// * PREFIX foaf: +// * SELECT ?player WHERE { +// * ?s foaf:page ?player . +// * ?s rdf:type . +// * ?s dbpedia2:position ?position . +// * ?s ?club . +// * ?club ?cap . +// * ?s ?place . +// * ?place ?pop . +// * ?s ?tricot . +// * } +// * +// * http://www.w3.org/2004/02/skos/core#subject -> 1 +// * http://dbpedia.org/resource/Category:First-person_shooters -> 47406 +// * http://xmlns.com/foaf/0.1/name -> 41 +// * http://xmlns.com/foaf/0.1/homepage -> 653 +// * http://www.w3.org/1999/02/22-rdf-syntax-ns#type -> 16 +// * http://dbpedia.org/resource/Category:German_musicians -> 187543 +// * http://www.w3.org/2000/01/rdf-schema#comment -> 27 +// * http://dbpedia.org/ontology/birthPlace -> 1132 +// * http://dbpedia.org/resource/Berlin -> 19706 +// * http://dbpedia.org/ontology/birthDate -> 436 +// * http://dbpedia.org/ontology/deathDate -> 1177 +// * http://dbpedia.org/resource/Category:Luxury_vehicles -> 322352 +// * http://dbpedia.org/ontology/manufacturer -> 11736 +// * http://www.w3.org/1999/02/22-rdf-syntax-ns#type -> 16 +// * http://dbpedia.org/property/name -> 30 +// * http://dbpedia.org/property/pages -> 37409 +// * http://dbpedia.org/property/isbn -> 3385 +// * http://dbpedia.org/property/author -> 3371 +// * http://xmlns.com/foaf/0.1/page -> 39 +// * http://dbpedia.org/ontology/SoccerPlayer -> 1723 +// * http://dbpedia.org/property/position -> 397 +// * http://dbpedia.org/property/clubs -> 1709 +// * http://dbpedia.org/ontology/capacity -> 6306 +// * http://dbpedia.org/property/population -> 966 +// * http://dbpedia.org/ontology/number -> 411 +// */ +// def fullQueries: List[QueryParticle] = List( +// QueryParticle(queryId = 1, +// unmatched = Array(TriplePattern(-1, 1, 47406), TriplePattern(-1, 41, -2)), +// bindings = new Array(2)), +// QueryParticle(2, Array(TriplePattern(-1, 653, -2), TriplePattern(-1, 16, -3)), +// bindings = new Array(3)), +// QueryParticle(3, Array(TriplePattern(-1, 1, 187543), TriplePattern(-1, 41, -2), TriplePattern(-1, 27, -3)), +// bindings = new Array(3)), +// QueryParticle(4, Array(TriplePattern(-1, 1132, 19706), TriplePattern(-1, 436, -2), TriplePattern(-1, 41, -3), TriplePattern(-1, 1177, -4)), +// bindings = new Array(4)), +// QueryParticle(5, Array(TriplePattern(-1, 1, 322352), TriplePattern(-1, 41, -2), TriplePattern(-1, 11736, -3), TriplePattern(-3, 41, -4)), +// bindings = new Array(4)), +// QueryParticle(6, Array(TriplePattern(-1, 16, -2), TriplePattern(-1, 30, -3), TriplePattern(-1, 37409, -4), TriplePattern(-1, 3385, -5), TriplePattern(-1, 3371, -6)), +// bindings = new Array(6)), +// QueryParticle(7, Array(TriplePattern(-1, 16, -2), TriplePattern(-1, 30, -3), TriplePattern(-1, 37409, -4), TriplePattern(-1, 3385, -5), TriplePattern(-1, 3371, -6)), +// bindings = new Array(6)), +// QueryParticle(8, Array(TriplePattern(-1, 39, -2), TriplePattern(-1, 16, 1723), TriplePattern(-1, 397, -3), TriplePattern(-1, 1709, -4), TriplePattern(-4, 6306, -5), TriplePattern(-1, 1132, -6), TriplePattern(-6, 966, -7), TriplePattern(-1, 411, -8)), +// bindings = new Array(8))) +// +// val queries = { +// require(!sampling && tickets == Long.MaxValue) +// fullQueries +// } +// +// var baseResults = Map[String, String]() +// val qe = new QueryEngine(GraphBuilder.withMessageBusFactory( +// new BulkAkkaMessageBusFactory(1024, false)). +// withMessageSerialization(false). +// withAkkaMessageCompression(true)) +// // withLoggingLevel(Logging.DebugLevel). +// // withConsole(true, 8080). +// // withNodeProvisioner(new TorqueNodeProvisioner( +// // torqueHost = new TorqueHost( +// // jobSubmitter = new TorqueJobSubmitter(username = System.getProperty("user.name"), hostname = "kraken.ifi.uzh.ch"), +// // localJarPath = assemblyPath, +// // jvmParameters = jvmHighThroughputGc, +// // priority = TorquePriority.fast), +// // numberOfNodes = 10))) +// +// def loadSmallLubm { +// val smallLubmFolderName = "lubm160-filtered-splits" +// for (splitId <- 0 until 2880) { +// qe.loadBinary(s"./$smallLubmFolderName/$splitId.filtered-split", Some(splitId)) +// if (splitId % 288 == 279) { +// println(s"Dispatched up to split #$splitId/2880, awaiting idle.") +// qe.awaitIdle +// println(s"Continuing graph loading...") +// } +// } +// println("Query engine preparing query execution.") +// qe.prepareQueryExecution +// println("Query engine ready.") +// } +// +// def loadLargeLubm { +// val largeLubmFolderName = "/home/torque/tmp/lubm10240-filtered-splits" +// for (splitId <- 0 until 2880) { +// qe.loadBinary(s"$largeLubmFolderName/$splitId.filtered-split", Some(splitId)) +// if (splitId % 288 == 279) { +// println(s"Dispatched up to split #$splitId/2880, awaiting idle.") +// qe.awaitIdle +// println(s"Continuing graph loading..") +// } +// } +// println("Query engine preparing query execution") +// qe.prepareQueryExecution +// } +// +// /** +// * Returns the time in milliseconds it takes to execute the code in 'codeBlock'. +// */ +// def measureTime(codeBlock: => Unit): Long = { +// val startTime = System.currentTimeMillis +// codeBlock +// val finishTime = System.currentTimeMillis +// finishTime - startTime +// } +// +// def roundToMillisecondFraction(nanoseconds: Long): Double = { +// ((nanoseconds / 100000.0).round) / 10.0 +// } +// +// def executeOnQueryEngine(q: QueryParticle): QueryResult = { +// val resultFuture = qe.executeQuery(q, optimizer) +// try { +// Await.result(resultFuture, new FiniteDuration(1000, TimeUnit.SECONDS)) // TODO handle exception +// } catch { +// case t: Throwable => +// println(s"Query $q timed out!") +// QueryResult(List(), Array("exception"), Array(t)) +// } +// } +// +// /** +// * Go to JVM JIT steady state by executing the query 100 times. +// */ +// def jitSteadyState { +// for (i <- 1 to 5) { +// for (queryId <- 1 to 7) { +// val queryIndex = queryId - 1 +// val query = fullQueries(queryIndex) +// print(s"Warming up with query $query ...") +// executeOnQueryEngine(query) +// qe.awaitIdle +// println(s" Done.") +// } +// } +// } +// +// def cleanGarbage { +// for (i <- 1 to 10) { +// System.gc +// Thread.sleep(100) +// } +// Thread.sleep(10000) +// } +// +// var finalResults = List[Map[String, String]]() +// def runEvaluation(queryId: Int) { +// var runResult = baseResults +// var date: Date = new Date +// val queryIndex = queryId - 1 +// val query = queries(queryIndex) +// val startTime = System.nanoTime +// val queryResult = executeOnQueryEngine(query) +// val queryStats: Map[Any, Any] = (queryResult.statKeys zip queryResult.statVariables).toMap.withDefaultValue("") +// val finishTime = System.nanoTime +// val executionTime = roundToMillisecondFraction(finishTime - startTime) +// val timeToFirstResult = roundToMillisecondFraction(queryStats("firstResultNanoTime").asInstanceOf[Long] - startTime) +// val optimizingTime = roundToMillisecondFraction(queryStats("optimizingDuration").asInstanceOf[Long]) +// runResult += s"revision" -> revision +// runResult += s"queryId" -> queryId.toString +// runResult += s"optimizer" -> optimizer.toString +// runResult += s"queryCopyCount" -> queryStats("queryCopyCount").toString +// runResult += s"query" -> queryStats("optimizedQuery").toString +// runResult += s"exception" -> queryStats("exception").toString +// runResult += s"results" -> queryResult.queries.length.toString +// runResult += s"samplingQuery" -> query.isSamplingQuery.toString +// runResult += s"tickets" -> query.tickets.toString +// runResult += s"executionTime" -> executionTime.toString +// runResult += s"timeUntilFirstResult" -> timeToFirstResult.toString +// runResult += s"optimizingTime" -> optimizingTime.toString +// runResult += s"totalMemory" -> bytesToGigabytes(Runtime.getRuntime.totalMemory).toString +// runResult += s"freeMemory" -> bytesToGigabytes(Runtime.getRuntime.freeMemory).toString +// runResult += s"usedMemory" -> bytesToGigabytes(Runtime.getRuntime.totalMemory - Runtime.getRuntime.freeMemory).toString +// runResult += s"executionHostname" -> java.net.InetAddress.getLocalHost.getHostName +// runResult += s"loadNumber" -> 160.toString +// runResult += s"date" -> date.toString +// finalResults = runResult :: finalResults +// } +// +// def bytesToGigabytes(bytes: Long): Double = ((bytes / 1073741824.0) * 10.0).round / 10.0 +// +// baseResults += "evaluationDescription" -> description +// val loadingTime = measureTime { +// println("Dispatching loading command to worker...") +// loadSmallLubm +// //loadLargeLubm +// qe.awaitIdle +// } +// baseResults += "loadingTime" -> loadingTime.toString +// +// println("Starting warm-up...") +// jitSteadyState +// //cleanGarbage +// println(s"Finished warm-up.") +// for (queryId <- 1 to 7) { +// println(s"Running evaluation for query $queryId.") +// runEvaluation(queryId) +// println(s"Done running evaluation for query $queryId. Awaiting idle") +// qe.awaitIdle +// println("Idle") +// } +// +// qe.shutdown +// finalResults +// } +// +//} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/OptimizerBenchmark.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/OptimizerBenchmark.scala new file mode 100644 index 0000000..8bad487 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/OptimizerBenchmark.scala @@ -0,0 +1,391 @@ +///* +// * @author Philip Stutz +// * @author Mihaela Verman +// * +// * Copyright 2013 University of Zurich +// * +// * Licensed under the Apache License, Version 2.0 (the "License"); +// * you may not use this file except in compliance with the License. +// * You may obtain a copy of the License at +// * +// * http://www.apache.org/licenses/LICENSE-2.0 +// * +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// * +// */ +// +//package com.signalcollect.triplerush.evaluation +// +//import java.io.File +//import java.util.Date +//import java.util.concurrent.TimeUnit +//import scala.concurrent.Await +//import scala.concurrent.duration.FiniteDuration +//import scala.io.Source +//import scala.util.Random +//import com.signalcollect.GraphBuilder +//import com.signalcollect.factory.messagebus.BulkAkkaMessageBusFactory +//import com.signalcollect.nodeprovisioning.torque.LocalHost +//import com.signalcollect.nodeprovisioning.torque.TorqueHost +//import com.signalcollect.nodeprovisioning.torque.TorqueJobSubmitter +//import com.signalcollect.nodeprovisioning.torque.TorqueNodeProvisioner +//import com.signalcollect.nodeprovisioning.torque.TorquePriority +//import com.signalcollect.triplerush.Mapping +//import com.signalcollect.triplerush.QueryParticle +//import com.signalcollect.triplerush.QueryEngine +//import com.signalcollect.triplerush.vertices.QueryOptimizer +//import com.signalcollect.triplerush.TriplePattern +//import com.signalcollect.triplerush.Mapping +//import akka.event.Logging +//import com.signalcollect.triplerush.vertices.QueryResult +//import com.signalcollect.triplerush.QuerySpecification +//import scala.collection.mutable.UnrolledBuffer +//import java.lang.management.ManagementFactory +//import collection.JavaConversions._ +//import language.postfixOps +// +//object OptimizerBenchmark extends App { +// def jvmParameters = " -Xmx31000m" + +// " -Xms31000m" + +// " -XX:+UnlockExperimentalVMOptions" + +// " -XX:+UseConcMarkSweepGC" + +// " -XX:+UseParNewGC" + +// " -XX:+CMSIncrementalPacing" + +// " -XX:+CMSIncrementalMode" + +// " -XX:ParallelGCThreads=20" + +// " -XX:ParallelCMSThreads=20" + +// " -XX:+AggressiveOpts" + +// " -XX:+AlwaysPreTouch" + +// " -XX:+UseNUMA" + +// " -XX:-UseBiasedLocking" + +// " -XX:MaxInlineSize=1024" +// +// def assemblyPath = "./target/scala-2.10/triplerush-assembly-1.0-SNAPSHOT.jar" +// val assemblyFile = new File(assemblyPath) +// val kraken = new TorqueHost( +// jobSubmitter = new TorqueJobSubmitter(username = System.getProperty("user.name"), hostname = "kraken.ifi.uzh.ch"), +// localJarPath = assemblyPath, jvmParameters = jvmParameters, jdkBinPath = "/home/user/stutz/jdk1.7.0/bin/", priority = TorquePriority.fast) +// val local = new LocalHost +// val googleDocs = new GoogleDocsResultHandler(args(0), args(1), "triplerush", "data") +// +// def getRevision: String = { +// try { +// val gitLogPath = ".git/logs/HEAD" +// val gitLog = new File(gitLogPath) +// val lines = Source.fromFile(gitLogPath).getLines +// val lastLine = lines.toList.last +// val revision = lastLine.split(" ")(1) +// revision +// } catch { +// case t: Throwable => "Unknown revision." +// } +// } +// +// /*********/ +// def evalName = s"LUBM Optimizer Tests with Parallel GC." +// def runs = 1 +// // var evaluation = new Evaluation(evaluationName = evalName, executionHost = kraken).addResultHandler(googleDocs) +// var evaluation = new Evaluation(evaluationName = evalName, executionHost = local).addResultHandler(googleDocs) +// /*********/ +// +// for (run <- 1 to runs) { +// for (unis <- List(800)) { +// for (optimizer <- List(QueryOptimizer.None)) { +// evaluation = evaluation.addEvaluationRun(lubmBenchmarkRun( +// evalName, +// false, +// Long.MaxValue, +// optimizer, +// getRevision, +// unis)) +// } +// } +// } +// evaluation.execute +// +// def lubmBenchmarkRun( +// description: String, +// sampling: Boolean, +// tickets: Long, +// optimizer: Int, +// revision: String, +// universities: Int)(): List[Map[String, String]] = { +// +// /** +// * Queries from: http://www.cs.rpi.edu/~zaki/PaperDir/WWW10.pdf +// * Result sizes from: http://research.microsoft.com/pubs/183717/Trinity.RDF.pdf +// * L1 L2 L3 L4 L5 L6 L7 +// * LUBM-160 397 173040 0 10 10 125 7125 +// * LUBM-10240 2502 11016920 0 10 10 125 450721 +// * +// * Times Trinity: 281 132 110 5 4 9 630 +// */ +// val x = -1 +// val y = -2 +// val z = -3 +// val m = Map( +// "rdf:type" -> 1, +// "ub:GraduateStudent" -> 2013, +// "ub:undergraduateDegreeFrom" -> 22, +// "ub:memberOf" -> 415, +// "ub:Department" -> 11, +// "ub:subOrganizationOf" -> 13, +// "ub:University" -> 7, +// "ub:Course" -> 3067, +// "ub:name" -> 8, +// "ub:UndergraduateStudent" -> 413, +// "ub:worksFor" -> 27, +// "http://www.Department0.University0.edu" -> 10, +// "ub:FullProfessor" -> 15, +// "ub:emailAddress" -> 28, +// "ub:telephone" -> 30, +// "ub:ResearchGroup" -> 2575, +// "http://www.University0.edu" -> 6, +// "ub:teacherOf" -> 17, +// "ub:advisor" -> 430, +// "ub:takesCourse" -> 417) +// def fullQueries: List[QuerySpecification] = List( +// QuerySpecification(1, Array( +// TriplePattern(x, m("rdf:type"), m("ub:GraduateStudent")), // ?X rdf:type ub:GraduateStudent +// TriplePattern(x, m("ub:undergraduateDegreeFrom"), y), // ?X ub:undergraduateDegreeFrom ?Y +// TriplePattern(x, m("ub:memberOf"), z), // ?X ub:memberOf ?Z +// TriplePattern(z, m("rdf:type"), m("ub:Department")), // ?Z rdf:type ub:Department +// TriplePattern(z, m("ub:subOrganizationOf"), y), // ?Z ub:subOrganizationOf ?Y +// TriplePattern(y, m("rdf:type"), m("ub:University")) // ?Y rdf:type ub:University +// ), +// new Array(3)), +// QuerySpecification(2, Array( +// TriplePattern(y, m("rdf:type"), m("ub:University")), // ?Y rdf:type ub:University +// TriplePattern(z, m("ub:subOrganizationOf"), y), // ?Z ub:subOrganizationOf ?Y +// TriplePattern(z, m("rdf:type"), m("ub:Department")), // ?Z rdf:type ub:Department +// TriplePattern(x, m("ub:memberOf"), z), // ?X ub:memberOf ?Z +// TriplePattern(x, m("ub:undergraduateDegreeFrom"), y), // ?X ub:undergraduateDegreeFrom ?Y +// TriplePattern(x, m("rdf:type"), m("ub:GraduateStudent")) // ?X rdf:type ub:GraduateStudent +// ), +// new Array(3)), +// QuerySpecification(3, Array( +// TriplePattern(y, m("rdf:type"), m("ub:University")), // ?Y rdf:type ub:University +// TriplePattern(x, m("ub:undergraduateDegreeFrom"), y), // ?X ub:undergraduateDegreeFrom ?Y +// TriplePattern(x, m("rdf:type"), m("ub:GraduateStudent")), // ?X rdf:type ub:GraduateStudent +// TriplePattern(x, m("ub:memberOf"), z), // ?X ub:memberOf ?Z +// TriplePattern(z, m("rdf:type"), m("ub:Department")), // ?Z rdf:type ub:Department +// TriplePattern(z, m("ub:subOrganizationOf"), y) // ?Z ub:subOrganizationOf ?Y +// ), +// new Array(3)), +// QuerySpecification(4, Array( +// TriplePattern(x, m("rdf:type"), m("ub:GraduateStudent")), // ?X rdf:type ub:GraduateStudent +// TriplePattern(x, m("ub:undergraduateDegreeFrom"), y), // ?X ub:undergraduateDegreeFrom ?Y +// TriplePattern(x, m("ub:memberOf"), z), // ?X ub:memberOf ?Z +// TriplePattern(y, m("rdf:type"), m("ub:University")), // ?Y rdf:type ub:University +// TriplePattern(z, m("rdf:type"), m("ub:Department")), // ?Z rdf:type ub:Department +// TriplePattern(z, m("ub:subOrganizationOf"), y) // ?Z ub:subOrganizationOf ?Y +// ), +// new Array(3))) +// val queries = { +// require(!sampling && tickets == Long.MaxValue) +// fullQueries +// } +// +// var baseResults = Map[String, String]() +// val kraken = new TorqueHost( +// jobSubmitter = new TorqueJobSubmitter(username = System.getProperty("user.name"), hostname = "kraken.ifi.uzh.ch"), +// localJarPath = assemblyPath, jvmParameters = jvmParameters, jdkBinPath = "/home/user/stutz/jdk1.7.0/bin/", priority = TorquePriority.fast) +// // +// val graphBuilder = GraphBuilder. +// // withLoggingLevel(Logging.DebugLevel). +// withNodeProvisioner(new TorqueNodeProvisioner(kraken, 8)) +// val qe = new QueryEngine(graphBuilder) +// +// def loadLubm { +// val lubmFolderName = s"lubm$universities-filtered-splits" +// for (splitId <- 0 until 2880) { +// qe.loadBinary(s"./$lubmFolderName/$splitId.filtered-split", Some(splitId)) +// if (splitId % 288 == 279) { +// println(s"Dispatched up to split #$splitId/2880, awaiting idle.") +// qe.awaitIdle +// println(s"Continuing graph loading...") +// } +// } +// } +// +// /** +// * Returns the time in milliseconds it takes to execute the code in 'codeBlock'. +// */ +// def measureTime(codeBlock: => Unit): Long = { +// val startTime = System.currentTimeMillis +// codeBlock +// val finishTime = System.currentTimeMillis +// finishTime - startTime +// } +// +// def roundToMillisecondFraction(nanoseconds: Long): Double = { +// ((nanoseconds / 100000.0).round) / 10.0 +// } +// +// def executeOnQueryEngine(q: QuerySpecification): QueryResult = { +// val resultFuture = qe.executeQuery(q, optimizer) +// try { +// Await.result(resultFuture, new FiniteDuration(1000, TimeUnit.SECONDS)) // TODO handle exception +// } catch { +// case t: Throwable => +// println(s"Query $q timed out!") +// QueryResult(UnrolledBuffer(), Array("exception"), Array(t)) +// } +// } +// +// def jitRepetitions = 100 +// +// /** +// * Go to JVM JIT steady state by executing the queries multiple times. +// */ +// def jitSteadyState { +// for (i <- 1 to jitRepetitions) { +// for (queryId <- 1 to 4) { +// val queryIndex = queryId - 1 +// val query = fullQueries(queryIndex) +// print(s"Warming up with query $query ...") +// executeOnQueryEngine(query) +// qe.awaitIdle +// println(s" Done.") +// } +// } +// } +// +// lazy val gcs = ManagementFactory.getGarbageCollectorMXBeans +// +// def getGcCollectionTime: Long = { +// gcs map (_.getCollectionTime) sum +// } +// +// def lastGcId: Long = { +// val sunGcs = gcs map (_.asInstanceOf[com.sun.management.GarbageCollectorMXBean]) +// val gcIds = sunGcs. +// map(_.getLastGcInfo). +// flatMap(info => if (info != null) Some(info.getId) else None) +// if (gcIds.isEmpty) 0 else gcIds.max +// } +// +// def freedDuringLastGc: Long = { +// val sunGcs = gcs map (_.asInstanceOf[com.sun.management.GarbageCollectorMXBean]) +// val usedBeforeLastGc = sunGcs. +// map(_.getLastGcInfo). +// map(_.getMemoryUsageBeforeGc). +// flatMap(_.values). +// map(_.getCommitted). +// sum +// val usedAfterLastGc = sunGcs. +// map(_.getLastGcInfo). +// map(_.getMemoryUsageAfterGc). +// flatMap(_.values). +// map(_.getCommitted). +// sum +// val freedDuringLastGc = usedBeforeLastGc - usedAfterLastGc +// freedDuringLastGc +// } +// +// def getGcCollectionCount: Long = { +// gcs map (_.getCollectionCount) sum +// } +// +// lazy val compilations = ManagementFactory.getCompilationMXBean +// +// lazy val javaVersion = ManagementFactory.getRuntimeMXBean.getVmVersion +// +// lazy val jvmLibraryPath = ManagementFactory.getRuntimeMXBean.getLibraryPath +// +// lazy val jvmArguments = ManagementFactory.getRuntimeMXBean.getInputArguments +// +// def cleanGarbage { +// for (i <- 1 to 10) { +// System.runFinalization +// System.gc +// Thread.sleep(10000) +// } +// Thread.sleep(120000) +// } +// +// var finalResults = List[Map[String, String]]() +// def runEvaluation(queryId: Int) { +// var runResult = baseResults +// var date: Date = new Date +// val queryIndex = queryId - 1 +// val query = queries(queryIndex) +// val gcTimeBefore = getGcCollectionTime +// val gcCountBefore = getGcCollectionCount +// val compileTimeBefore = compilations.getTotalCompilationTime +// runResult += s"totalMemoryBefore" -> bytesToGigabytes(Runtime.getRuntime.totalMemory).toString +// runResult += s"freeMemoryBefore" -> bytesToGigabytes(Runtime.getRuntime.freeMemory).toString +// runResult += s"usedMemoryBefore" -> bytesToGigabytes(Runtime.getRuntime.totalMemory - Runtime.getRuntime.freeMemory).toString +// val startTime = System.nanoTime +// val queryResult = executeOnQueryEngine(query) +// val finishTime = System.nanoTime +// val queryStats: Map[Any, Any] = (queryResult.statKeys zip queryResult.statVariables).toMap.withDefaultValue("") +// val executionTime = roundToMillisecondFraction(finishTime - startTime) +// val optimizingTime = roundToMillisecondFraction(queryStats("optimizingDuration").asInstanceOf[Long]) +// val gcTimeAfter = getGcCollectionTime +// val gcCountAfter = getGcCollectionCount +// val gcTimeDuringQuery = gcTimeAfter - gcTimeBefore +// val gcCountDuringQuery = gcCountAfter - gcCountBefore +// val compileTimeAfter = compilations.getTotalCompilationTime +// val compileTimeDuringQuery = compileTimeAfter - compileTimeBefore +// runResult += s"revision" -> revision +// runResult += s"queryId" -> queryId.toString +// runResult += s"optimizer" -> optimizer.toString +// runResult += s"queryCopyCount" -> queryStats("queryCopyCount").toString +// runResult += s"query" -> queryStats("optimizedQuery").toString +// runResult += s"exception" -> queryStats("exception").toString +// runResult += s"results" -> queryResult.bindings.length.toString +// runResult += s"executionTime" -> executionTime.toString +// runResult += s"optimizingTime" -> optimizingTime.toString +// runResult += s"totalMemory" -> bytesToGigabytes(Runtime.getRuntime.totalMemory).toString +// runResult += s"freeMemory" -> bytesToGigabytes(Runtime.getRuntime.freeMemory).toString +// runResult += s"usedMemory" -> bytesToGigabytes(Runtime.getRuntime.totalMemory - Runtime.getRuntime.freeMemory).toString +// runResult += s"executionHostname" -> java.net.InetAddress.getLocalHost.getHostName +// runResult += "gcTimeAfter" -> gcTimeAfter.toString +// runResult += "gcCountAfter" -> gcCountAfter.toString +// runResult += "gcTimeDuringQuery" -> gcTimeDuringQuery.toString +// runResult += "gcCountDuringQuery" -> gcCountDuringQuery.toString +// runResult += "compileTimeAfter" -> compileTimeAfter.toString +// runResult += "compileTimeDuringQuery" -> compileTimeDuringQuery.toString +// runResult += s"loadNumber" -> universities.toString +// runResult += s"date" -> date.toString +// runResult += s"dataSet" -> s"lubm$universities" +// finalResults = runResult :: finalResults +// } +// +// def bytesToGigabytes(bytes: Long): Double = ((bytes / 1073741824.0) * 10.0).round / 10.0 +// +// baseResults += "evaluationDescription" -> description +// baseResults += "jitRepetitions" -> jitRepetitions.toString +// baseResults += "java.runtime.version" -> System.getProperty("java.runtime.version") +// baseResults += "javaVmVersion" -> javaVersion +// baseResults += "jvmLibraryPath" -> jvmLibraryPath +// baseResults += "jvmArguments" -> jvmArguments.mkString(" ") +// +// val loadingTime = measureTime { +// println("Dispatching loading command to worker...") +// loadLubm +// qe.awaitIdle +// } +// baseResults += "loadingTime" -> loadingTime.toString +// +// println("Starting warm-up...") +// jitSteadyState +// cleanGarbage +// println(s"Finished warm-up.") +// for (queryId <- 1 to 4) { +// println(s"Running evaluation for query $queryId.") +// runEvaluation(queryId) +// println(s"Done running evaluation for query $queryId. Awaiting idle") +// qe.awaitIdle +// println("Idle") +// } +// qe.shutdown +// finalResults +// } +// +//} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/PatternBuilder.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/PatternBuilder.scala new file mode 100644 index 0000000..95122f5 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/PatternBuilder.scala @@ -0,0 +1,213 @@ +/* + * @author Philip Stutz + * @author Mihaela Verman + * + * Copyright 2013 University of Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.signalcollect.triplerush.evaluation + +import java.util.HashMap +import collection.JavaConversions._ +import scala.io.Source +import com.signalcollect.triplerush.TriplePattern +import scala.io.Codec + +// def fullQueries: List[PatternQuery] = List( +// SELECT ? "X" ? "Y" ? "Z" WHERE ( +// | - "X" - s"$rdf#type" - s"$ub#GraduateStudent", +// | - "X" - s"$ub#undergraduateDegreeFrom" - "Y", +// | - "X" - s"$ub#memberOf" - "Z", +// | - "Z" - s"$rdf#type" - s"$ub#Department", +// | - "Z" - s"$ub#subOrganizationOf" - "Y", +// | - "Y" - s"$rdf#type" - s"$ub#University"), +// SELECT ? "X" ? "Y" WHERE ( +// | - "X" - s"$rdf#type" - s"$ub#Course", +// | - "X" - s"$ub#name" - "Y"), +// SELECT ? "X" ? "Y" ? "Z" WHERE ( +// | - "X" - s"$ub#undergraduateDegreeFrom" - "Y", +// | - "X" - s"$rdf#type" - s"$ub#UndergraduateStudent", +// | - "X" - s"$ub#memberOf" - "Z", +// | - "Z" - s"$ub#subOrganizationOf" - "Y", +// | - "Z" - s"$rdf#type" - s"$ub#Department", +// | - "Y" - s"$rdf#type" - s"$ub#University"), +// SELECT ? "X" ? "Y1" ? "Y2" ? "Y3" WHERE ( +// | - "X" - s"$ub#worksFor" - "http://www.Department0.University0.edu", +// | - "X" - s"$rdf#type" - s"$ub#FullProfessor", +// | - "X" - s"$ub#name" - "Y1", +// | - "X" - s"$ub#emailAddress" - "Y2", +// | - "X" - s"$ub#telephone" - "Y3"), +// SELECT ? "X" WHERE ( +// | - "X" - s"$ub#subOrganizationOf" - "http://www.Department0.University0.edu", +// | - "X" - s"$rdf#type" - s"$ub#ResearchGroup"), +// SELECT ? "X" ? "Y" WHERE ( +// | - "Y" - s"$ub#subOrganizationOf" - "http://www.University0.edu", +// | - "Y" - s"$rdf#type" - s"$ub#Department", +// | - "X" - s"$ub#worksFor" - "Y", +// | - "X" - s"$rdf#type" - s"$ub#FullProfessor"), +// SELECT ? "X" ? "Y" ? "Z" WHERE ( +// | - "Y" - s"$rdf#type" - s"$ub#FullProfessor", +// | - "Y" - s"$ub#teacherOf" - "Z", +// | - "Z" - s"$rdf#type" - s"$ub#Course", +// | - "X" - s"$ub#advisor" - "Y", +// | - "X" - s"$ub#takesCourse" - "Z", +// | - "X" - s"$rdf#type" - s"$ub#UndergraduateStudent")) + +case class TextTriplePattern(s: String, p: String, o: String) + +object LubmQueryBuilder extends App { + val ub = "http://swat.cse.lehigh.edu/onto/univ-bench.owl" + val rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns" + + val builder = new PatternBuilder("./lubm160/dictionary.txt") + val lubmQuery1Patterns = builder.build( + List( + TextTriplePattern("?X", s"$rdf#type", s"$ub#GraduateStudent"), + TextTriplePattern("?X", s"$ub#undergraduateDegreeFrom", "?Y"), + TextTriplePattern("?X", s"$ub#memberOf", "?Z"), + TextTriplePattern("?Z", s"$rdf#type", s"$ub#Department"), + TextTriplePattern("?Z", s"$ub#subOrganizationOf", "?Y"), + TextTriplePattern("?Y", s"$rdf#type", s"$ub#University"))) + val lubmQuery2Patterns = builder.build( + List( + TextTriplePattern("?X", s"$rdf#type", s"$ub#Course"), + TextTriplePattern("?X", s"$ub#name", "?Y"))) + val lubmQuery3Patterns = builder.build( + List( + TextTriplePattern("?X", s"$ub#undergraduateDegreeFrom", "?Y"), + TextTriplePattern("?X", s"$rdf#type", s"$ub#UndergraduateStudent"), + TextTriplePattern("?X", s"$ub#memberOf", "?Z"), + TextTriplePattern("?Z", s"$ub#subOrganizationOf", "?Y"), + TextTriplePattern("?Z", s"$rdf#type", s"$ub#Department"), + TextTriplePattern("?Y", s"$rdf#type", s"$ub#University"))) + val lubmQuery4Patterns = builder.build( + List( + TextTriplePattern("?X", s"$ub#worksFor", "http://www.Department0.University0.edu"), + TextTriplePattern("?X", s"$rdf#type", s"$ub#FullProfessor"), + TextTriplePattern("?X", s"$ub#name", "?Y1"), + TextTriplePattern("?X", s"$ub#emailAddress", "?Y2"), + TextTriplePattern("?X", s"$ub#telephone", "?Y3"))) + val lubmQuery5Patterns = builder.build( + List( + TextTriplePattern("?X", s"$ub#subOrganizationOf", "http://www.Department0.University0.edu"), + TextTriplePattern("?X", s"$rdf#type", s"$ub#ResearchGroup"))) + val lubmQuery6Patterns = builder.build( + List( + TextTriplePattern("?Y", s"$ub#subOrganizationOf", "http://www.University0.edu"), + TextTriplePattern("?Y", s"$rdf#type", s"$ub#Department"), + TextTriplePattern("?X", s"$ub#worksFor", "?Y"), + TextTriplePattern("?X", s"$rdf#type", s"$ub#FullProfessor"))) + val lubmQuery7Patterns = builder.build( + List( + TextTriplePattern("?Y", s"$rdf#type", s"$ub#FullProfessor"), + TextTriplePattern("?Y", s"$ub#teacherOf", "?Z"), + TextTriplePattern("?Z", s"$rdf#type", s"$ub#Course"), + TextTriplePattern("?X", s"$ub#advisor", "?Y"), + TextTriplePattern("?X", s"$ub#takesCourse", "?Z"), + TextTriplePattern("?X", s"$rdf#type", s"$ub#UndergraduateStudent"))) + +// SELECT ? "X" ? "Y" WHERE ( +// | - "X" - s"$rdf#type" - s"$ub#Course", +// | - "X" - s"$ub#name" - "Y"), +// SELECT ? "X" ? "Y" ? "Z" WHERE ( +// | - "X" - s"$ub#undergraduateDegreeFrom" - "Y", +// | - "X" - s"$rdf#type" - s"$ub#UndergraduateStudent", +// | - "X" - s"$ub#memberOf" - "Z", +// | - "Z" - s"$ub#subOrganizationOf" - "Y", +// | - "Z" - s"$rdf#type" - s"$ub#Department", +// | - "Y" - s"$rdf#type" - s"$ub#University"), +// SELECT ? "X" ? "Y1" ? "Y2" ? "Y3" WHERE ( +// | - "X" - s"$ub#worksFor" - "http://www.Department0.University0.edu", +// | - "X" - s"$rdf#type" - s"$ub#FullProfessor", +// | - "X" - s"$ub#name" - "Y1", +// | - "X" - s"$ub#emailAddress" - "Y2", +// | - "X" - s"$ub#telephone" - "Y3"), +// SELECT ? "X" WHERE ( +// | - "X" - s"$ub#subOrganizationOf" - "http://www.Department0.University0.edu", +// | - "X" - s"$rdf#type" - s"$ub#ResearchGroup"), +// SELECT ? "X" ? "Y" WHERE ( +// | - "Y" - s"$ub#subOrganizationOf" - "http://www.University0.edu", +// | - "Y" - s"$rdf#type" - s"$ub#Department", +// | - "X" - s"$ub#worksFor" - "Y", +// | - "X" - s"$rdf#type" - s"$ub#FullProfessor"), +// SELECT ? "X" ? "Y" ? "Z" WHERE ( +// | - "Y" - s"$rdf#type" - s"$ub#FullProfessor", +// | - "Y" - s"$ub#teacherOf" - "Z", +// | - "Z" - s"$rdf#type" - s"$ub#Course", +// | - "X" - s"$ub#advisor" - "Y", +// | - "X" - s"$ub#takesCourse" - "Z", +// | - "X" - s"$rdf#type" - s"$ub#UndergraduateStudent")) + + println(lubmQuery1Patterns) + println(lubmQuery2Patterns) + println(lubmQuery3Patterns) + println(lubmQuery4Patterns) + println(lubmQuery5Patterns) + println(lubmQuery6Patterns) + println(lubmQuery7Patterns) +} +// +// final val ISO8859: Codec = new Codec(Charset forName "ISO-8859-1") +// final val UTF8: Codec = new Codec(Charset forName "UTF-8") + +class PatternBuilder(val dictionaryPath: String) { + val dictionary = new HashMap[String, Int]() + //implicit val codec = Codec.ISO8859 + val dictionaryFile = Source.fromFile(dictionaryPath, "UTF-16") + var linesRead = 0 + for (line <- dictionaryFile.getLines) { + val entry = line.split(" ") + if (entry.length == 3) { + dictionary.put(entry(0), Integer.parseInt(entry(2))) + linesRead += 1 + if (linesRead % 10000 == 0) { + println(s"$linesRead dictionary entries loaded.") + } + } else if (entry.length != 0) { + throw new Exception(s"Failed to parse line $line, was parsed to ${entry.toList}.") + } + } + + def build(textPatterns: List[TextTriplePattern]): List[TriplePattern] = { + var nextVariableId = -1 + var variables = Map[String, Int]() + textPatterns map { textPattern => + def getId(entry: String): Int = { + if (entry.startsWith("?")) { + if (variables.contains(entry)) { + variables(entry) + } else { + val id = nextVariableId + variables += entry -> id + nextVariableId -= 1 + id + } + } else { + if (dictionary.containsKey(entry)) { + dictionary.get(entry) + } else { + Int.MaxValue + } + } + } + val sId = getId(textPattern.s) + val pId = getId(textPattern.p) + val oId = getId(textPattern.o) + TriplePattern(sId, pId, oId) + } + } + +} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/PatternQueryParser.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/PatternQueryParser.scala new file mode 100644 index 0000000..e360b4c --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/PatternQueryParser.scala @@ -0,0 +1,100 @@ +///* +// * @author Philip Stutz +// * @author Mihaela Verman +// * +// * Copyright 2013 University of Zurich +// * +// * Licensed under the Apache License, Version 2.0 (the "License"); +// * you may not use this file except in compliance with the License. +// * You may obtain a copy of the License at +// * +// * http://www.apache.org/licenses/LICENSE-2.0 +// * +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// * +// */ +// +//package com.signalcollect.triplerush.evaluation +// +//import com.signalcollect.triplerush._ +//import scala.collection.JavaConversions._ +//import com.hp.hpl.jena.query._ +//import com.hp.hpl.jena.sparql.syntax._ +//import com.signalcollect.triplerush.TriplePattern +//import com.signalcollect.triplerush.PatternQuery +// +//object PatternQueryParser { +// def build(q: String): Either[PatternQuery, String] = { +// val visitor = new JenaQueryVisitor(q) +// visitor.getPatternQuery +// } +// +// class JenaQueryVisitor(queryString: String) extends ElementVisitor { +// private val jenaQuery = QueryFactory.create(queryString) +// private val queryPatterns = jenaQuery.getQueryPattern +// private var variables: Set[String] = Set() +// private var patterns: List[TriplePattern] = List() +// private var problem: Option[String] = if (!jenaQuery.isStrict) { +// Some("Only strict queries are supported.") +// } else if (jenaQuery.isDistinct) { +// Some("Feature DISTINCT is unsupported.") +// } else if (jenaQuery.isReduced) { +// Some("Feature REDUCED is unsupported.") +// } else if (jenaQuery.isOrdered) { +// Some("Feature ORDERED is unsupported.") +// } else if (jenaQuery.isQueryResultStar) { +// Some("Result variables as * is unsupported.") +// } else { +// None +// } +// def getPatternQuery: Either[PatternQuery, String] = { +// queryPatterns.visit(this) +// if (problem.isDefined) { +// Right(problem.get) +// } else { +// Left(PatternQuery(0, patterns)) +// } +// } +// def visit(el: ElementGroup) { +// for (element <- el.getElements) { +// element.visit(this) +// } +// } +// def visit(el: ElementPathBlock) { +// for (pattern <- el.patternElts) { +// val triple = pattern.asTriple +// val tripleList = List(triple.getSubject, triple.getPredicate, triple.getObject) +// val idList = tripleList map { e => +// if (e.isVariable) { +// Mapping.register(e.getName, isVariable = true) +// } else if (e.isLiteral) { +// Mapping.register(e.getLiteral.toString) +// } else { +// Mapping.register(e.getURI) +// } +// } +// patterns = patterns ::: List(TriplePattern(idList(0), idList(1), idList(2))) +// } +// } +// private def unsupported(el: Element) = throw new UnsupportedOperationException(el.toString) +// def visit(el: ElementTriplesBlock) = unsupported(el) +// def visit(el: ElementFilter) = unsupported(el) +// def visit(el: ElementAssign) = unsupported(el) +// def visit(el: ElementBind) = unsupported(el) +// def visit(el: ElementData) = unsupported(el) +// def visit(el: ElementUnion) = unsupported(el) +// def visit(el: ElementOptional) = unsupported(el) +// def visit(el: ElementDataset) = unsupported(el) +// def visit(el: ElementNamedGraph) = unsupported(el) +// def visit(el: ElementExists) = unsupported(el) +// def visit(el: ElementNotExists) = unsupported(el) +// def visit(el: ElementMinus) = unsupported(el) +// def visit(el: ElementService) = unsupported(el) +// def visit(el: ElementFetch) = unsupported(el) +// def visit(el: ElementSubQuery) = unsupported(el) +// } +//} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/ThroughputLubmBenchmark.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/ThroughputLubmBenchmark.scala new file mode 100644 index 0000000..6b12d43 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/ThroughputLubmBenchmark.scala @@ -0,0 +1,262 @@ +///* +// * @author Philip Stutz +// * @author Mihaela Verman +// * +// * Copyright 2013 University of Zurich +// * +// * Licensed under the Apache License, Version 2.0 (the "License"); +// * you may not use this file except in compliance with the License. +// * You may obtain a copy of the License at +// * +// * http://www.apache.org/licenses/LICENSE-2.0 +// * +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// * +// */ +// +//package com.signalcollect.triplerush.evaluation +// +//import java.io.File +//import scala.concurrent.Await +//import scala.concurrent.ExecutionContext.Implicits.global +//import scala.concurrent.duration.DurationInt +//import scala.concurrent.future +//import com.signalcollect.nodeprovisioning.torque.LocalHost +//import com.signalcollect.nodeprovisioning.torque.TorqueHost +//import com.signalcollect.nodeprovisioning.torque.TorqueJobSubmitter +//import com.signalcollect.nodeprovisioning.torque.TorquePriority +//import com.signalcollect.triplerush.Mapping +//import com.signalcollect.triplerush.PatternQuery +//import com.signalcollect.triplerush.QueryEngine +//import com.signalcollect.triplerush.evaluation.SparqlDsl.SELECT +//import com.signalcollect.triplerush.evaluation.SparqlDsl.dsl2Query +//import com.signalcollect.triplerush.evaluation.SparqlDsl.{| => |} +//import com.signalcollect.triplerush.Mapping +// +///** +// * Runs a PageRank algorithm on a graph of a fixed size +// * for different numbers of worker threads. +// * +// * Evaluation is set to execute on a 'Kraken'-node. +// */ +//object ThroughputLubmBenchmark extends App { +// val jvmParameters = " -Xmx64000m" + +// " -Xms64000m" + +// " -Xmn8000m" + +// " -d64" + +// " -XX:+UnlockExperimentalVMOptions" + +// " -XX:+UseConcMarkSweepGC" + +// " -XX:+UseParNewGC" + +// " -XX:+CMSIncrementalPacing" + +// " -XX:+CMSIncrementalMode" + +// " -XX:ParallelGCThreads=20" + +// " -XX:ParallelCMSThreads=20" + +// " -XX:MaxInlineSize=1024" +// // val jvmParameters = " -Xmx64000m" + +// // " -Xms64000m" +// val kraken = new TorqueHost( +// jobSubmitter = new TorqueJobSubmitter(username = System.getProperty("user.name"), hostname = "kraken.ifi.uzh.ch"), +// localJarPath = "./target/triplerush-assembly-1.0-SNAPSHOT.jar", jvmParameters = jvmParameters, priority = TorquePriority.fast) +// val localHost = new LocalHost +// val googleDocs = new GoogleDocsResultHandler(args(0), args(1), "triplerush-throughput", "data") +// +// /*********/ +// def evalName = "Throughput LUBM benchmarking - Futures - Ela" +// // def evalName = "Local debugging." +// val runs = 1 +// var evaluation = new Evaluation(evaluationName = evalName, executionHost = kraken).addResultHandler(googleDocs) +// /*********/ +// +// for (run <- 1 to runs) { +// for (queryId <- 1 to 1) { +// //for (tickets <- List(1000, 10000, 100000, 1000000)) { +// //evaluation = evaluation.addEvaluationRun(lubmBenchmarkRun(evalName, queryId, true, tickets)) +// // evaluation = evaluation.addEvaluationRun(lubmBenchmarkRun(evalName, queryId, false, tickets)) +// // } +// evaluation = evaluation.addEvaluationRun(throughputLubmBenchmarkRun(evalName, queryId, false, Long.MaxValue)) +// } +// } +// evaluation.execute +// +// def throughputLubmBenchmarkRun(description: String, queryId: Int, sampling: Boolean, tickets: Long)(): List[Map[String, String]] = { +// val ub = "http://swat.cse.lehigh.edu/onto/univ-bench.owl" +// val rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns" +// +// Mapping.setAbbreviations(Map( +// ub -> "ub:", +// rdf -> "rdf:", +// "http://www" -> "www", +// "Department" -> "D", +// "University" -> "U", +// ".edu/" -> "/", +// "FullProfessor" -> "FP", +// "AssociateProfessor" -> "ACP", +// "AssistantProfessor" -> "ASP", +// "Lecturer" -> "L", +// "Undergraduate" -> "UG", +// "Student" -> "S", +// "Graduate" -> "G", +// "ResearchGroup" -> "RG", +// "Publication" -> "P", +// "Course" -> "C", +// "xxx-xxx-xxxx" -> "?", +// "telephone" -> "tel", +// "emailAddress" -> "email", +// "publicationAuthor" -> "author", +// "undergraduateDegreeFrom" -> "UGDF", +// "subOrganizationOf" -> "subOrg")) +// +// /** +// * Queries from: http://www.cs.rpi.edu/~zaki/PaperDir/WWW10.pdf +// * Result sizes from: http://research.microsoft.com/pubs/183717/Trinity.RDF.pdf +// * L1 L2 L3 L4 L5 L6 L7 +// * LUBM-160 397 173040 0 10 10 125 7125 +// * LUBM-10240 2502 11016920 0 10 10 125 450721 +// * +// * Times Trinity: 281 132 110 5 4 9 630 +// * Time TripleR: 3815 222 3126 2 1 2 603 +// */ +// +// def query1: PatternQuery = { +// SELECT ? "X" ? "Y" ? "Z" WHERE ( +// | - "X" - s"$rdf#type" - s"$ub#GraduateStudent", +// | - "X" - s"$ub#undergraduateDegreeFrom" - "Y", +// | - "X" - s"$ub#memberOf" - "Z", +// | - "Z" - s"$rdf#type" - s"$ub#Department", +// | - "Z" - s"$ub#subOrganizationOf" - "Y", +// | - "Y" - s"$rdf#type" - s"$ub#University") +// } +// +// def query6(uniNumberString: String): PatternQuery = { +// SELECT ? "X" ? "Y" WHERE ( +// | - "Y" - s"$ub#subOrganizationOf" - s"http://www.University$uniNumberString.edu", +// | - "Y" - s"$rdf#type" - s"$ub#Department", +// | - "X" - s"$ub#worksFor" - "Y", +// | - "X" - s"$rdf#type" - s"$ub#FullProfessor") +// } +// +// def query7: PatternQuery = { +// SELECT ? "X" ? "Y" ? "Z" WHERE ( +// | - "Y" - s"$rdf#type" - s"$ub#FullProfessor", +// | - "Y" - s"$ub#teacherOf" - "Z", +// | - "Z" - s"$rdf#type" - s"$ub#Course", +// | - "X" - s"$ub#advisor" - "Y", +// | - "X" - s"$ub#takesCourse" - "Z", +// | - "X" - s"$rdf#type" - s"$ub#UndergraduateStudent") +// } +// +// def fullThroughputQueries6: List[PatternQuery] = (0 to 159).map(uniNumber => query6(uniNumber.toString())).toList +// +// val warmUpQueries = { +// fullThroughputQueries6 +// } +// +// val evaluatedQueries = { +// fullThroughputQueries6 +// } +// +// var baseResults = Map[String, String]() +// val qe = new QueryEngine() +// +// def loadLubm160 { +// val lubm160FolderName = "lubm160" +// val folder = new File(lubm160FolderName) +// for (file <- folder.listFiles) { +// if (file.getName.startsWith("University") && file.getName.endsWith(".nt")) { +// qe.loadNtriples(file.getAbsolutePath) +// } +// } +// } +// +// /** +// * Returns the time in milliseconds it takes to execute the code in 'codeBlock'. +// */ +// def measureTime(codeBlock: => Unit): Long = { +// val startTime = System.currentTimeMillis +// codeBlock +// val finishTime = System.currentTimeMillis +// finishTime - startTime +// } +// +// def roundToMillisecondFraction(nanoseconds: Long): Double = { +// ((nanoseconds / 100000.0).round) / 10.0 +// } +// +// def executeMultipleOnQueryEngine(qList: List[PatternQuery]): List[(List[PatternQuery], Map[String, Any])] = { +// // qList.par.map(q => executeOnQueryEngine(q)).toList +// +// val resultFutures = qList.map(q => { +// future { +// executeOnQueryEngine(q) +// } +// }) +// resultFutures.map(Await.result(_, 1000 seconds)) +// +// } +// +// def executeOnQueryEngine(q: PatternQuery): (List[PatternQuery], Map[String, Any]) = { +// val resultFuture = qe.executeQuery(q) +// val result = Await.result(resultFuture, 1000 seconds) +// result +// } +// +// /** +// * Go to JVM JIT steady state by executing the full versions of other queries 10 times. +// */ +// def jitSteadyState { +// for (i <- 1 to 1) { +// val queryList = warmUpQueries //TODO +// executeMultipleOnQueryEngine(queryList) +// qe.awaitIdle +// } +// } +// +// def cleanGarbage { +// for (i <- 1 to 10) { +// System.gc +// Thread.sleep(100) +// } +// Thread.sleep(10000) +// } +// +// var finalResults = List[Map[String, String]]() +// def runEvaluation(queryId: Int) { +// var runResult = baseResults +// +// val startTime = System.nanoTime +// +// val queryList = evaluatedQueries //TODO are they executed in parallel? +// val queryResult = executeMultipleOnQueryEngine(queryList) +// +// val finishTime = System.nanoTime +// val executionTime = roundToMillisecondFraction(finishTime - startTime) +// //val timeToFirstResult = roundToMillisecondFraction(queryResult._2("firstResultNanoTime").asInstanceOf[Long] - startTime) +// runResult += s"queryId" -> queryId.toString +// //runResult += s"results" -> queryResult._1.length.toString +// //runResult += s"samplingQuery" -> query.isSamplingQuery.toString +// //runResult += s"tickets" -> query.tickets.toString +// runResult += s"executionTime" -> executionTime.toString +// //runResult += s"timeUntilFirstResult" -> timeToFirstResult.toString +// finalResults = runResult :: finalResults +// } +// +// baseResults += "evaluationDescription" -> description +// val loadingTime = measureTime { +// loadLubm160 +// qe.awaitIdle +// } +// baseResults += "loadingTime" -> loadingTime.toString +// +// jitSteadyState +// cleanGarbage +// runEvaluation(queryId) +// qe.awaitIdle +// qe.shutdown +// finalResults +// } +// +//} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/TriplerushEval.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/TriplerushEval.scala new file mode 100644 index 0000000..f2792b2 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/TriplerushEval.scala @@ -0,0 +1,256 @@ +/* + * @author Philip Stutz + * @author Mihaela Verman + * + * Copyright 2013 University of Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.signalcollect.triplerush.evaluation + +import java.io.File +import java.util.Date +import java.util.concurrent.TimeUnit +import scala.concurrent.Await +import scala.concurrent.duration.FiniteDuration +import scala.concurrent.duration._ +import scala.io.Source +import scala.util.Random +import com.signalcollect.GraphBuilder +import com.signalcollect.factory.messagebus.BulkAkkaMessageBusFactory +import com.signalcollect.nodeprovisioning.torque.LocalHost +import com.signalcollect.nodeprovisioning.torque.TorqueHost +import com.signalcollect.nodeprovisioning.torque.TorqueJobSubmitter +import com.signalcollect.nodeprovisioning.torque.TorqueNodeProvisioner +import com.signalcollect.nodeprovisioning.torque.TorquePriority +import com.signalcollect.triplerush.Mapping +import com.signalcollect.triplerush.QueryParticle +import com.signalcollect.triplerush.QueryEngine +import com.signalcollect.triplerush.TriplePattern +import com.signalcollect.triplerush.Mapping +import akka.event.Logging +import com.signalcollect.triplerush.QuerySpecification +import scala.collection.mutable.UnrolledBuffer +import java.lang.management.ManagementFactory +import collection.JavaConversions._ +import language.postfixOps +import com.signalcollect.triplerush.TripleRush +import com.signalcollect.nodeprovisioning.torque.TorqueNodeProvisioner +import collection.JavaConversions._ +import java.lang.management.GarbageCollectorMXBean +import com.signalcollect.nodeprovisioning.NodeProvisioner +import com.signalcollect.triplerush.optimizers.Optimizer + +trait TriplerushEval { + + def description: String + def numberOfNodes: Int + def warmupRepetitions: Int + def optimizerCreator: TripleRush => Option[Optimizer] + def revision: String + def torquePriority: String + + import EvalHelpers._ + + def evaluationRun: List[Map[String, String]] + + def initializeGraphBuilder: GraphBuilder[Any, Any] = { + GraphBuilder. + withNodeProvisioner(initializeTorqueNodeProvisioner) + } + + def initializeTorqueNodeProvisioner: NodeProvisioner = { + new TorqueNodeProvisioner( + krakenFromKraken, + numberOfNodes, + allocateWorkersOnCoordinatorNode = true, + copyExecutable = false) + } + + def initializeTr(gb: GraphBuilder[Any, Any]): TripleRush = { + assert(gb != null) + new TripleRush(gb) + } + + def baseStats = Map[String, String]( + ("evaluationDescription", description), + ("numberOfNodes", numberOfNodes.toString), + ("jitRepetitions", warmupRepetitions.toString), + ("java.runtime.version", System.getProperty("java.runtime.version"))) + + def krakenFromKraken = new TorqueHost( + jobSubmitter = new TorqueJobSubmitter(username = System.getProperty("user.name"), hostname = "kraken.ifi.uzh.ch"), + coresPerNode = 23, + localJarPath = "/home/user/stutz/triplerush-assembly-1.0-SNAPSHOT.jar", jvmParameters = jvmParameters, jdkBinPath = "/home/user/stutz/jdk1.7.0/bin/", priority = torquePriority) + + def runEvaluation(query: QuerySpecification, queryDescription: String, optimizer: Option[Optimizer], tr: TripleRush, commonResults: Map[String, String]): Map[String, String] = { + val gcs = ManagementFactory.getGarbageCollectorMXBeans.toList + val compilations = ManagementFactory.getCompilationMXBean + val javaVersion = ManagementFactory.getRuntimeMXBean.getVmVersion + val jvmLibraryPath = ManagementFactory.getRuntimeMXBean.getLibraryPath + val jvmArguments = ManagementFactory.getRuntimeMXBean.getInputArguments + var runResult = commonResults + runResult += (("javaVmVersion", javaVersion)) + runResult += (("jvmLibraryPath", jvmLibraryPath)) + runResult += (("jvmArguments", jvmArguments.mkString(" "))) + val date: Date = new Date + val gcTimeBefore = getGcCollectionTime(gcs) + val gcCountBefore = getGcCollectionCount(gcs) + val compileTimeBefore = compilations.getTotalCompilationTime + runResult += ((s"totalMemoryBefore", bytesToGigabytes(Runtime.getRuntime.totalMemory).toString)) + runResult += ((s"freeMemoryBefore", bytesToGigabytes(Runtime.getRuntime.freeMemory).toString)) + runResult += ((s"usedMemoryBefore", bytesToGigabytes(Runtime.getRuntime.totalMemory - Runtime.getRuntime.freeMemory).toString)) + val startTime = System.nanoTime + val (queryResultFuture, queryStatsFuture) = tr.executeAdvancedQuery(query, optimizer) + val queryResult = Await.result(queryResultFuture, 7200 seconds) + val finishTime = System.nanoTime + val executionTime = roundToMillisecondFraction(finishTime - startTime) + val gcTimeAfter = getGcCollectionTime(gcs) + val gcCountAfter = getGcCollectionCount(gcs) + val gcTimeDuringQuery = gcTimeAfter - gcTimeBefore + val gcCountDuringQuery = gcCountAfter - gcCountBefore + val compileTimeAfter = compilations.getTotalCompilationTime + val compileTimeDuringQuery = compileTimeAfter - compileTimeBefore + val queryStats = Await.result(queryStatsFuture, 10 seconds) + val optimizingTime = roundToMillisecondFraction(queryStats("optimizingDuration").asInstanceOf[Long]) + runResult += ((s"revision", revision)) + runResult += ((s"queryId", queryDescription)) + runResult += ((s"optimizer", optimizer.toString)) + runResult += ((s"queryCopyCount", queryStats("queryCopyCount").toString)) + runResult += ((s"query", queryStats("optimizedQuery").toString)) + runResult += ((s"exception", queryStats("exception").toString)) + runResult += ((s"results", queryResult.size.toString)) + runResult += ((s"executionTime", executionTime.toString)) + runResult += ((s"optimizingTime", optimizingTime.toString)) + runResult += ((s"totalMemory", bytesToGigabytes(Runtime.getRuntime.totalMemory).toString)) + runResult += ((s"freeMemory", bytesToGigabytes(Runtime.getRuntime.freeMemory).toString)) + runResult += ((s"usedMemory", bytesToGigabytes(Runtime.getRuntime.totalMemory - Runtime.getRuntime.freeMemory).toString)) + runResult += ((s"executionHostname", java.net.InetAddress.getLocalHost.getHostName)) + runResult += (("gcTimeAfter", gcTimeAfter.toString)) + runResult += (("gcCountAfter", gcCountAfter.toString)) + runResult += (("gcTimeDuringQuery", gcTimeDuringQuery.toString)) + runResult += (("gcCountDuringQuery", gcCountDuringQuery.toString)) + runResult += (("compileTimeAfter", compileTimeAfter.toString)) + runResult += (("compileTimeDuringQuery", compileTimeDuringQuery.toString)) + runResult += s"date" -> date.toString + runResult + } + +} + +object EvalHelpers { + + def jvmParameters = " -Xmx31000m" + + " -Xms31000m" + + " -XX:+AggressiveOpts" + + " -XX:+AlwaysPreTouch" + + " -XX:+UseNUMA" + + " -XX:-UseBiasedLocking" + + " -XX:MaxInlineSize=1024" + + def assemblyPath = "./target/scala-2.10/triplerush-assembly-1.0-SNAPSHOT.jar" + def assemblyFile = new File(assemblyPath) + def kraken(torquePriority: String) = new TorqueHost( + jobSubmitter = new TorqueJobSubmitter(username = System.getProperty("user.name"), hostname = "kraken.ifi.uzh.ch"), + coresPerNode = 23, + localJarPath = assemblyPath, jvmParameters = jvmParameters, jdkBinPath = "/home/user/stutz/jdk1.8.0/bin/", priority = torquePriority) + def localHost = new LocalHost + + def getRevision: String = { + try { + val gitLogPath = ".git/logs/HEAD" + val gitLog = new File(gitLogPath) + val lines = Source.fromFile(gitLogPath).getLines + val lastLine = lines.toList.last + val revision = lastLine.split(" ")(1) + revision + } catch { + case t: Throwable => "Unknown revision." + } + } + + def bytesToGigabytes(bytes: Long): Double = ((bytes / 1073741824.0) * 10.0).round / 10.0 + + def cleanGarbage { + for (i <- 1 to 10) { + System.runFinalization + System.gc + Thread.sleep(10000) + } + Thread.sleep(120000) + } + + /** + * Returns the time in milliseconds it takes to execute the code in 'codeBlock'. + */ + def measureTime(codeBlock: => Unit): Long = { + val startTime = System.currentTimeMillis + codeBlock + val finishTime = System.currentTimeMillis + finishTime - startTime + } + + def roundToMillisecondFraction(nanoseconds: Long): Double = { + ((nanoseconds / 100000.0).round) / 10.0 + } + + /** + * Go to JVM JIT steady state by executing the queries multiple times. + */ + def jitSteadyState(queries: List[QuerySpecification], optimizer: Option[Optimizer], tr: TripleRush, repetitions: Int = 100) { + for (i <- 1 to repetitions) { + println(s"running warmup $i/$repetitions") + for (query <- queries) { + tr.executeQuery(query, optimizer) + tr.awaitIdle + } + } + println(s"warmup finished") + } + + def getGcCollectionTime(gcs: List[GarbageCollectorMXBean]): Long = { + gcs map (_.getCollectionTime) sum + } + + def lastGcId(gcs: List[GarbageCollectorMXBean]): Long = { + val sunGcs = gcs map (_.asInstanceOf[com.sun.management.GarbageCollectorMXBean]) + val gcIds = sunGcs. + map(_.getLastGcInfo). + flatMap(info => if (info != null) Some(info.getId) else None) + if (gcIds.isEmpty) 0 else gcIds.max + } + + def freedDuringLastGc(gcs: List[GarbageCollectorMXBean]): Long = { + val sunGcs = gcs map (_.asInstanceOf[com.sun.management.GarbageCollectorMXBean]) + val usedBeforeLastGc = sunGcs. + map(_.getLastGcInfo). + map(_.getMemoryUsageBeforeGc). + flatMap(_.values). + map(_.getCommitted). + sum + val usedAfterLastGc = sunGcs. + map(_.getLastGcInfo). + map(_.getMemoryUsageAfterGc). + flatMap(_.values). + map(_.getCommitted). + sum + val freedDuringLastGc = usedBeforeLastGc - usedAfterLastGc + freedDuringLastGc + } + + def getGcCollectionCount(gcs: List[GarbageCollectorMXBean]): Long = { + gcs map (_.getCollectionCount) sum + } +} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/YagoBenchmark.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/YagoBenchmark.scala new file mode 100644 index 0000000..6b6a4b0 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/YagoBenchmark.scala @@ -0,0 +1,341 @@ +///* +// * @author Philip Stutz +// * @author Mihaela Verman +// * +// * Copyright 2013 University of Zurich +// * +// * Licensed under the Apache License, Version 2.0 (the "License"); +// * you may not use this file except in compliance with the License. +// * You may obtain a copy of the License at +// * +// * http://www.apache.org/licenses/LICENSE-2.0 +// * +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// * +// */ +// +//package com.signalcollect.triplerush.evaluation +// +//import java.io.File +//import java.util.Date +//import java.util.concurrent.TimeUnit +//import scala.concurrent.Await +//import scala.concurrent.duration.FiniteDuration +//import scala.io.Source +//import scala.util.Random +//import com.signalcollect.GraphBuilder +//import com.signalcollect.factory.messagebus.BulkAkkaMessageBusFactory +//import com.signalcollect.nodeprovisioning.torque.LocalHost +//import com.signalcollect.nodeprovisioning.torque.TorqueHost +//import com.signalcollect.nodeprovisioning.torque.TorqueJobSubmitter +//import com.signalcollect.nodeprovisioning.torque.TorqueNodeProvisioner +//import com.signalcollect.nodeprovisioning.torque.TorquePriority +//import com.signalcollect.triplerush.Mapping +//import com.signalcollect.triplerush.QueryParticle +//import com.signalcollect.triplerush.QueryEngine +//import com.signalcollect.triplerush.QueryOptimizer +//import com.signalcollect.triplerush.TriplePattern +//import com.signalcollect.triplerush.Mapping +//import akka.event.Logging +//import com.signalcollect.triplerush.QueryResult +// +///** +// * Runs a PageRank algorithm on a graph of a fixed size +// * for different numbers of worker threads. +// * +// * Evaluation is set to execute on a 'Kraken'-node. +// */ +//object YagoBenchmark extends App { +// def jvmHighThroughputGc = " -Xmx64000m" + +// " -Xms64000m" + +// " -Xmn8000m" + +// " -d64" + +// " -XX:+UnlockExperimentalVMOptions" + +// " -XX:+UseConcMarkSweepGC" + +// " -XX:+UseParNewGC" + +// " -XX:+CMSIncrementalPacing" + +// " -XX:+CMSIncrementalMode" + +// " -XX:ParallelGCThreads=20" + +// " -XX:ParallelCMSThreads=20" + +// " -XX:-PrintCompilation" + +// " -XX:-PrintGC" + +// " -Dsun.io.serialization.extendedDebugInfo=true" + +// " -XX:MaxInlineSize=1024" +// +// def jvmParameters = " -Xmx64000m" + +// " -Xms64000m" +// def assemblyPath = "./target/scala-2.10/triplerush-assembly-1.0-SNAPSHOT.jar" +// val assemblyFile = new File(assemblyPath) +// // val jobId = Random.nextInt % 10000 +// // def copyName = assemblyPath.replace("-SNAPSHOT", jobId.toString) +// // assemblyFile.renameTo(new File(assemblyPath)) +// val kraken = new TorqueHost( +// jobSubmitter = new TorqueJobSubmitter(username = System.getProperty("user.name"), hostname = "kraken.ifi.uzh.ch"), +// localJarPath = assemblyPath, jvmParameters = jvmHighThroughputGc, priority = TorquePriority.superfast) +// val localHost = new LocalHost +// val googleDocs = new GoogleDocsResultHandler(args(0), args(1), "triplerush", "data") +// +// def getRevision: String = { +// try { +// val gitLogPath = ".git/logs/HEAD" +// val gitLog = new File(gitLogPath) +// val lines = Source.fromFile(gitLogPath).getLines +// val lastLine = lines.toList.last +// val revision = lastLine.split(" ")(1) +// revision +// } catch { +// case t: Throwable => "Unknown revision." +// } +// } +// +// /*********/ +// def evalName = "Yago eval." +// // def evalName = "Local debugging." +// def runs = 1 +// var evaluation = new Evaluation(evaluationName = evalName, executionHost = kraken).addResultHandler(googleDocs) +// // var evaluation = new Evaluation(evaluationName = evalName, executionHost = localHost).addResultHandler(googleDocs) +// /*********/ +// +// for (run <- 1 to runs) { +// // for (queryId <- 1 to 1) { +// for (optimizer <- List(QueryOptimizer.Clever)) { +// //for (tickets <- List(1000, 10000, 100000, 1000000)) { +// //evaluation = evaluation.addEvaluationRun(lubmBenchmarkRun(evalName, queryId, true, tickets)) +// // evaluation = evaluation.addEvaluationRun(lubmBenchmarkRun(evalName, queryId, false, tickets)) +// // } +// evaluation = evaluation.addEvaluationRun(lubmBenchmarkRun( +// evalName, +// //queryId, +// false, +// Long.MaxValue, +// optimizer, +// getRevision)) +// } +// // } +// } +// evaluation.execute +// +// def lubmBenchmarkRun( +// description: String, +// //queryId: Int, +// sampling: Boolean, +// tickets: Long, +// optimizer: Int, +// revision: String)(): List[Map[String, String]] = { +// +// /** +// * Queries from: https://domino.mpi-inf.mpg.de/intranet/ag5/ag5publ.nsf/0/AD3DBAFA6FB90DD2C1257593002FF3DF/$file/rdf3x.pdf +// * +// * Yago Dataset. We grouped the queries thematically into three groups. The first +// * group consists of oriented facts, e.g.: "scientists from Switzerland with a +// * doctoral advisor from Germany" (A1). The second group is relationship oriented, +// * e.g. "two actors from England playing together in the same movie" (B1). The +// * third group examines relationships with unknown predicates, e.g. "two scientists +// * related to the same city" (C1). +// * A1: select ?gn ?fn where f ?gn ?p. ?fn ?p. +// * ?p "scientist"; ?city; ?a. ?a +// * ?city2. ?city "Switzerland". ?city2 +// * "Germany". g +// * A2: select ?n where f ?a ?n; "actor"; ?city; +// * ?m1; ?m2. ?city ?s. ?s +// * "United States". ?m1 "movie"; "Germany". ?m2 +// * 51 "movie"; "Canada". g +// * A3: select distinct ?n ?co where f ?p ?n. f ?p "actor" g +// * union f ?p "athlete" g ?p ?c. ?c ?s. ?s +// * ?co. ?p ?t. Filter(?t reaches "politician" via ) +// * g +// * B1: select distinct ?n1 ?n2 where f ?a1 ?n1; ?c1; +// * ?movie. ?a2 ?n2; ?c2; ?movie. ?c1 +// * "England". ?c2 "England". Filter (?a1 != ?a2) g +// * B2: select ?n1 ?n2 where f ?p1 ?n1; ?city; +// * ?p2. ?p2 ?n2; ?city. g +// * B3: select distinct ?n1 ?n2 where f ?n1 ?p1. ?n2 ?p2. ?p1 "scientist"; ?award; ?city. ?p2 "scientist"; ?award; +// * ?city. Filter (?p1 != ?p2) g +// * C1: select distinct ?n1 ?n2 where f?n1 ?p1. ?n2 ?p2. ?p1 "scientist"; [] ?city. ?p2 "scientist"; [] ?city. +// * ?city Filter (?p1 != ?p2) g +// * C2: select distinct ?n where f ?p ?n; [] ?c1. [] ?c2. ?c1 +// * ; "London". ?c2 ; "Paris". g +// */ +// def fullQueries: List[QueryParticle] = List( +// QueryParticle(queryId = 1, +// unmatched = Array(TriplePattern(-1, 2, 2009), TriplePattern(-1, 18, -2), TriplePattern(-1, 411, -3), TriplePattern(-3, 2, 7), TriplePattern(-3, 9, -2), TriplePattern(-2, 2, 3)), +// bindings = new Array(3)), +// QueryParticle(2, Array(TriplePattern(-1, 2, 3063), TriplePattern(-1, 4, -2)), +// bindings = new Array(2)), +// QueryParticle(3, Array(TriplePattern(-1, 18, -2), TriplePattern(-1, 2, 409), TriplePattern(-1, 411, -3), TriplePattern(-3, 9, -2), TriplePattern(-3, 2, 7), TriplePattern(-2, 2, 3)), +// bindings = new Array(3)), +// QueryParticle(4, Array(TriplePattern(-1, 23, 6), TriplePattern(-1, 2, 11), TriplePattern(-1, 4, -2), TriplePattern(-1, 24, -3), TriplePattern(-1, 26, -4)), +// bindings = new Array(4)), +// QueryParticle(5, Array(TriplePattern(-1, 9, 6), TriplePattern(-1, 2, 2571)), +// bindings = new Array(1)), +// QueryParticle(6, Array(TriplePattern(-1, 9, 1), TriplePattern(-1, 2, 7), TriplePattern(-2, 23, -1), TriplePattern(-2, 2, 11)), +// bindings = new Array(2)), +// QueryParticle(7, Array(TriplePattern(-1, 2, 11), TriplePattern(-1, 13, -2), TriplePattern(-2, 2, 3063), TriplePattern(-3, 426, -1), TriplePattern(-3, 413, -2), TriplePattern(-3, 2, 409)), +// bindings = new Array(3))) +// val queries = { +// require(!sampling && tickets == Long.MaxValue) +// fullQueries +// } +// +// var baseResults = Map[String, String]() +// val qe = new QueryEngine(GraphBuilder.withMessageBusFactory( +// new BulkAkkaMessageBusFactory(1024, false)). +// withMessageSerialization(false). +// withAkkaMessageCompression(true)) +// // withLoggingLevel(Logging.DebugLevel). +// // withConsole(true, 8080). +// // withNodeProvisioner(new TorqueNodeProvisioner( +// // torqueHost = new TorqueHost( +// // jobSubmitter = new TorqueJobSubmitter(username = System.getProperty("user.name"), hostname = "kraken.ifi.uzh.ch"), +// // localJarPath = assemblyPath, +// // jvmParameters = jvmHighThroughputGc, +// // priority = TorquePriority.fast), +// // numberOfNodes = 10))) +// +// def loadSmallLubm { +// val smallLubmFolderName = "lubm160-filtered-splits" +// for (splitId <- 0 until 2880) { +// qe.loadBinary(s"./$smallLubmFolderName/$splitId.filtered-split", Some(splitId)) +// if (splitId % 288 == 279) { +// println(s"Dispatched up to split #$splitId/2880, awaiting idle.") +// qe.awaitIdle +// println(s"Continuing graph loading...") +// } +// } +// println("Query engine preparing query execution.") +// qe.prepareQueryExecution +// println("Query engine ready.") +// } +// +// def loadLargeLubm { +// val largeLubmFolderName = "/home/torque/tmp/lubm10240-filtered-splits" +// for (splitId <- 0 until 2880) { +// qe.loadBinary(s"$largeLubmFolderName/$splitId.filtered-split", Some(splitId)) +// if (splitId % 288 == 279) { +// println(s"Dispatched up to split #$splitId/2880, awaiting idle.") +// qe.awaitIdle +// println(s"Continuing graph loading..") +// } +// } +// println("Query engine preparing query execution") +// qe.prepareQueryExecution +// } +// +// /** +// * Returns the time in milliseconds it takes to execute the code in 'codeBlock'. +// */ +// def measureTime(codeBlock: => Unit): Long = { +// val startTime = System.currentTimeMillis +// codeBlock +// val finishTime = System.currentTimeMillis +// finishTime - startTime +// } +// +// def roundToMillisecondFraction(nanoseconds: Long): Double = { +// ((nanoseconds / 100000.0).round) / 10.0 +// } +// +// def executeOnQueryEngine(q: QueryParticle): QueryResult = { +// val resultFuture = qe.executeQuery(q, optimizer) +// try { +// Await.result(resultFuture, new FiniteDuration(1000, TimeUnit.SECONDS)) // TODO handle exception +// } catch { +// case t: Throwable => +// println(s"Query $q timed out!") +// QueryResult(List(), Array("exception"), Array(t)) +// } +// } +// +// /** +// * Go to JVM JIT steady state by executing the query 100 times. +// */ +// def jitSteadyState { +// for (i <- 1 to 5) { +// for (queryId <- 1 to 7) { +// val queryIndex = queryId - 1 +// val query = fullQueries(queryIndex) +// print(s"Warming up with query $query ...") +// executeOnQueryEngine(query) +// qe.awaitIdle +// println(s" Done.") +// } +// } +// } +// +// def cleanGarbage { +// for (i <- 1 to 10) { +// System.gc +// Thread.sleep(100) +// } +// Thread.sleep(10000) +// } +// +// var finalResults = List[Map[String, String]]() +// def runEvaluation(queryId: Int) { +// var runResult = baseResults +// var date: Date = new Date +// val queryIndex = queryId - 1 +// val query = queries(queryIndex) +// val startTime = System.nanoTime +// val queryResult = executeOnQueryEngine(query) +// val queryStats: Map[Any, Any] = (queryResult.statKeys zip queryResult.statVariables).toMap.withDefaultValue("") +// val finishTime = System.nanoTime +// val executionTime = roundToMillisecondFraction(finishTime - startTime) +// val timeToFirstResult = roundToMillisecondFraction(queryStats("firstResultNanoTime").asInstanceOf[Long] - startTime) +// val optimizingTime = roundToMillisecondFraction(queryStats("optimizingDuration").asInstanceOf[Long]) +// runResult += s"revision" -> revision +// runResult += s"queryId" -> queryId.toString +// runResult += s"optimizer" -> optimizer.toString +// runResult += s"queryCopyCount" -> queryStats("queryCopyCount").toString +// runResult += s"query" -> queryStats("optimizedQuery").toString +// runResult += s"exception" -> queryStats("exception").toString +// runResult += s"results" -> queryResult.queries.length.toString +// runResult += s"samplingQuery" -> query.isSamplingQuery.toString +// runResult += s"tickets" -> query.tickets.toString +// runResult += s"executionTime" -> executionTime.toString +// runResult += s"timeUntilFirstResult" -> timeToFirstResult.toString +// runResult += s"optimizingTime" -> optimizingTime.toString +// runResult += s"totalMemory" -> bytesToGigabytes(Runtime.getRuntime.totalMemory).toString +// runResult += s"freeMemory" -> bytesToGigabytes(Runtime.getRuntime.freeMemory).toString +// runResult += s"usedMemory" -> bytesToGigabytes(Runtime.getRuntime.totalMemory - Runtime.getRuntime.freeMemory).toString +// runResult += s"executionHostname" -> java.net.InetAddress.getLocalHost.getHostName +// runResult += s"loadNumber" -> 160.toString +// runResult += s"date" -> date.toString +// finalResults = runResult :: finalResults +// } +// +// def bytesToGigabytes(bytes: Long): Double = ((bytes / 1073741824.0) * 10.0).round / 10.0 +// +// baseResults += "evaluationDescription" -> description +// val loadingTime = measureTime { +// println("Dispatching loading command to worker...") +// loadSmallLubm +// //loadLargeLubm +// qe.awaitIdle +// } +// baseResults += "loadingTime" -> loadingTime.toString +// +// println("Starting warm-up...") +// jitSteadyState +// //cleanGarbage +// println(s"Finished warm-up.") +// for (queryId <- 1 to 7) { +// println(s"Running evaluation for query $queryId.") +// runEvaluation(queryId) +// println(s"Done running evaluation for query $queryId. Awaiting idle") +// qe.awaitIdle +// println("Idle") +// } +// +// qe.shutdown +// finalResults +// } +// +//} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/YagoUseCase.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/YagoUseCase.scala new file mode 100644 index 0000000..c2fe889 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/YagoUseCase.scala @@ -0,0 +1,210 @@ +///* +// * @author Philip Stutz +// * @author Mihaela Verman +// * +// * Copyright 2013 University of Zurich +// * +// * Licensed under the Apache License, Version 2.0 (the "License"); +// * you may not use this file except in compliance with the License. +// * You may obtain a copy of the License at +// * +// * http://www.apache.org/licenses/LICENSE-2.0 +// * +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// * +// */ +// +//package com.signalcollect.triplerush.evaluation +// +//import scala.concurrent.ExecutionContext.Implicits.global +//import com.signalcollect.triplerush.evaluation.SparqlDsl._ +//import java.io.FileOutputStream +//import com.signalcollect.triplerush.Mapping +//import com.signalcollect.triplerush.QueryEngine +//import scala.concurrent.Await +//import scala.concurrent.duration.FiniteDuration +//import java.util.concurrent.TimeUnit +//import com.signalcollect.triplerush.PatternQuery +// +//object YagoUseCase extends App { +// val output = new FileOutputStream("results.txt") +// val qe = new QueryEngine +// +// val yago = "http://yago-knowledge.org/resource" +// val rdf = "http://www.w3.org/2000/01/rdf-schema" +// val owl = "http://www.w3.org/2002/07/owl" +// +// Mapping.setAbbreviations(Map( +// yago -> "yago:", +// rdf -> "rdf:", +// owl -> "owl:")) +// +// //load("./yago/yagoSchema.nt") +// load("./yago/yagoTaxonomy.nt") +// load("./yago/yagoTypes.nt", onlyKnown = true) +// load("./yago/yagoFacts.nt", onlyKnown = true) +// +// def load(f: String, onlyKnown: Boolean = false) { +// qe.load(f, bidirectionalPredicates = false, onlyKnown, Set( +// s"$owl#disjointWith", +// s"$yago/hasGender", +// // s"$rdf#type", +// // s"$rdf#subClassOf", +// s"$yago/hasWebsite")) +// qe.awaitIdle +// } +// +// // val q = SAMPLE(100000) ? "o1" ? "o2" ? "p1" ? "p2" WHERE ( //SAMPLE(10000000) +// // | - s"$yago/wordnet_president_110467179" - "p1" - "o1", +// // | - "o1" - "p2" - "o2") +// +// val queries: Map[Int, (String, Int) => PatternQuery] = Map( +// 1 -> { +// case (entityName: String, tickets: Int) => SAMPLE(tickets) ? "o1" ? "p1" WHERE ( +// | - s"$yago/$entityName" - "p1" - "o1") +// }, +// 3 -> { +// case (entityName: String, tickets: Int) => SAMPLE(tickets) ? "o1" ? "o2" ? "o3" ? "p1" ? "p2" ? "p3" WHERE ( +// | - s"$yago/$entityName" - "p1" - "o1", +// | - "o1" - "p2" - "o2", +// | - "o2" - "p3" - "o3") +// }, +// 5 -> { +// case (entityName: String, tickets: Int) => SAMPLE(tickets) ? "o1" ? "o2" ? "o3" ? "o4" ? "o5" ? "p1" ? "p2" ? "p3" ? "p4" ? "p5" WHERE ( +// | - s"$yago/$entityName" - "p1" - "o1", +// | - "o1" - "p2" - "o2", +// | - "o2" - "p3" - "o3", +// | - "o3" - "p4" - "o4", +// | - "o4" - "p5" - "o5") +// }, +// 7 -> { +// case (entityName: String, tickets: Int) => SAMPLE(tickets) ? "o1" ? "o2" ? "o3" ? "o4" ? "o5" ? "o6" ? "o7" ? "p1" ? "p2" ? "p3" ? "p4" ? "p5" ? "p6" ? "p7" WHERE ( +// | - s"$yago/$entityName" - "p1" - "o1", +// | - "o1" - "p2" - "o2", +// | - "o2" - "p3" - "o3", +// | - "o3" - "p4" - "o4", +// | - "o4" - "p5" - "o5", +// | - "o5" - "p6" - "o6", +// | - "o6" - "p7" - "o7") +// }, +// 9 -> { +// case (entityName: String, tickets: Int) => SAMPLE(tickets) ? "o1" ? "o2" ? "o3" ? "o4" ? "o5" ? "o6" ? "o7" ? "o8" ? "o9" ? "p1" ? "p2" ? "p3" ? "p4" ? "p5" ? "p6" ? "p7" ? "p8" ? "p9" WHERE ( +// | - s"$yago/$entityName" - "p1" - "o1", +// | - "o1" - "p2" - "o2", +// | - "o2" - "p3" - "o3", +// | - "o3" - "p4" - "o4", +// | - "o4" - "p5" - "o5", +// | - "o5" - "p6" - "o6", +// | - "o6" - "p7" - "o7", +// | - "o7" - "p8" - "o8", +// | - "o8" - "p9" - "o9") +// }) +// +// var entityName = "" +// +// while (entityName != "exit") { +// println("Please enter an entity name:") +// entityName = readLine +// +// if (entityName != "exit") { +// println("Please enter the sample size:") +// val sampleSize = try { +// val read = readLine.toInt +// if (read >= 1) { +// read +// } else { +// println("Invalid input, using 1000.") +// 1000 +// } +// } catch { +// case whatever: Throwable => +// println("Invalid input, using 1000.") +// 1000 +// } +// println("Please enter the path length:") +// val pathLength = try { +// val read = readLine.toInt +// if (queries.keySet.contains(read)) { +// read +// } else { +// println("Invalid input, using 1.") +// 1 +// } +// } catch { +// case whatever: Throwable => +// println("Invalid input, using 1.") +// 1 +// } +// println("Please enter the number of bindings per variable:") +// val topKBindings = try { +// val read = readLine.toInt +// if (read >= 1) { +// read +// } else { +// println("Invalid input, using 10.") +// 10 +// } +// } catch { +// case whatever: Throwable => +// println("Invalid input, using 10.") +// 10 +// } +// +// println("Executing query ...") +// +// val resultFuture = qe.executeQuery(queries(pathLength)(entityName, sampleSize)) +// // result onSuccess { +// // case results => +// // println("Result bindings:") +// // results foreach { result => +// // println("\t" + result.bindings + " tickets = " + result.tickets) +// // } +// // } +// val result = Await.result(resultFuture, new FiniteDuration(1000, TimeUnit.SECONDS)) +// +// println("Analyzing results ...") +// +// result match { +// case (patterns, metadata) => +// // variable binding #paths +// var bindingsStats = Map[Int, Map[Int, Long]]().withDefaultValue(Map[Int, Long]().withDefaultValue(0l)) +// output(s"Total # of result bindings ${patterns.length}\n") +// for (pattern <- patterns) { +// for (binding <- pattern.bindings.map) { +// val variableId = binding._1 +// val valueId = binding._2 +// var currentStatsForVariable: Map[Int, Long] = bindingsStats(variableId) +// var numberOfTicketsForValue = currentStatsForVariable(valueId) +// numberOfTicketsForValue += pattern.tickets +// currentStatsForVariable = currentStatsForVariable.updated(valueId, numberOfTicketsForValue) +// bindingsStats = bindingsStats.updated(variableId, currentStatsForVariable) +// } +// } +// for (variable <- bindingsStats.keys) { +// val variableString = Mapping.getString(variable) +// output(s"Stats for variable $variableString:\n") +// val valueMap = bindingsStats(variable) +// val totalTickets = valueMap.values.sum +// for (value <- valueMap.toSeq.sortBy(_._2)(Ordering[Long].reverse).map(_._1).take(topKBindings)) { +// val valueString = Mapping.getString(value) +// val ticketsForValue = valueMap(value) +// output(s"\t$valueString: ${ticketsForValue.toDouble / totalTickets.toDouble}\n") +// } +// } +// } +// } +// qe.awaitIdle +// println("The query is done.") +// } +// qe.shutdown +// output.flush +// output.close +// def output(msg: String) { +// print(msg) +// output.write(msg.getBytes) +// } +//} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/AnnotationFilter.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/AnnotationFilter.scala new file mode 100644 index 0000000..570a1a3 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/AnnotationFilter.scala @@ -0,0 +1,46 @@ +/* + * @author Philip Stutz + * @author Mihaela Verman + * + * Copyright 2013 University of Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.signalcollect.triplerush.evaluation.lubm +import scala.sys.process._ +import scala.io.Source +import java.io.FileOutputStream +import java.io.OutputStreamWriter + +/** + * Removes annotations from ntriple files. + */ +object AnnotationFilter extends KrakenExecutable { + run(Filter.filter(args(0)) _) +} + +object Filter { + def filter(fileName: String)() { + import FileOperations._ + val ntriplesLines = Source.fromFile(fileName).getLines + val filtered = new FileOutputStream(fileName + "-filtered") + val writer = new OutputStreamWriter(filtered, "UTF8") + for (line <- ntriplesLines) { + writer.write(line.replace("^^", "") + "\n") + } + writer.close + filtered.close + } +} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/DictionaryEncoder.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/DictionaryEncoder.scala new file mode 100644 index 0000000..428738a --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/DictionaryEncoder.scala @@ -0,0 +1,110 @@ +/* + * @author Philip Stutz + * @author Mihaela Verman + * + * Copyright 2013 University of Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.signalcollect.triplerush.evaluation.lubm + +import java.io.DataOutputStream +import java.io.File +import java.io.FileInputStream +import java.io.FileOutputStream +import java.util.HashMap +import collection.JavaConversions._ +import org.semanticweb.yars.nx.parser.NxParser +import java.io.OutputStreamWriter +import FileOperations.createFolder +import FileOperations.filesIn + +object DictionaryEncoder extends KrakenExecutable with Serializable { + run(Encoder.encode(args(0)) _) +} + +object Encoder { + def encode(sourceFolderBaseName: String)() { + import FileOperations._ + + val sourceFolderName = s"./${sourceFolderBaseName}-nt" + val targetFolderName = sourceFolderName.replace("nt", "binary") + createFolder(targetFolderName) + val source = new File(sourceFolderName) + val target = new File(targetFolderName) + var nextId = 1 + val dictionaryPath = s"$targetFolderName/dictionary.txt" + val dictionary = new HashMap[String, Int]() + val ub = "http://swat.cse.lehigh.edu/onto/univ-bench.owl" + val rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns" + + def register(entry: String): Int = { + val id = dictionary.get(entry) + if (id != 0) { + id + } else { + val idForEntry = nextId + dictionary.put(entry, idForEntry) + nextId += 1 + idForEntry + } + } + + def encodeFile(source: File, target: File) { + val is = new FileInputStream(source) + val binaryOs = new FileOutputStream(target) + val binaryDos = new DataOutputStream(binaryOs) + val nxp = new NxParser(is) + while (nxp.hasNext) { + val triple = nxp.next + val subjectString = triple(0).toString + val predicateString = triple(1).toString + val objectString = triple(2).toString + val sId = register(subjectString) + val pId = register(predicateString) + val oId = register(objectString) + binaryDos.writeInt(sId) + binaryDos.writeInt(pId) + binaryDos.writeInt(oId) + } + binaryDos.close + binaryOs.close + is.close + } + println("Encoding files ...") + + val sourceFiles = filesIn(sourceFolderName). + filter(_.getName.endsWith(".nt")). + sorted + + for (src <- sourceFiles) { + val trg = new File(targetFolderName + "/" + src.getName.replace(".nt", ".binary")) + println(s"Encoding file $src.") + encodeFile(src, trg) + } + + println(s"${sourceFiles.length} files have been encoded, ${nextId} unique ids.") + + println("Writing dictionary.") + val dictionaryOs = new FileOutputStream(dictionaryPath) + val writer = new OutputStreamWriter(dictionaryOs, "UTF8") + for (entry <- dictionary) { + writer.write(s"${entry._1} -> ${entry._2}\n") + } + writer.close + dictionaryOs.close + + } +} diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/DictionaryEncoderWithBase.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/DictionaryEncoderWithBase.scala new file mode 100644 index 0000000..1981c32 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/DictionaryEncoderWithBase.scala @@ -0,0 +1,129 @@ +/* + * @author Philip Stutz + * @author Mihaela Verman + * + * Copyright 2013 University of Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.signalcollect.triplerush.evaluation.lubm + +import java.io.DataOutputStream +import java.io.File +import java.io.FileInputStream +import java.io.FileOutputStream +import java.util.HashMap +import collection.JavaConversions._ +import scala.io.Source +import org.semanticweb.yars.nx.parser.NxParser +import java.io.OutputStreamWriter +import com.signalcollect.triplerush.evaluation.QueryEncoding + +object DictionaryEncoderWithBase extends KrakenExecutable with Serializable { +// override def executionHost = local + run(BaseEncoder.encode(args(0)) _) +} + +case object BaseEncoder extends Serializable { + def encode(sourceFolderBaseName: String)() { + import FileOperations._ + + val sourceFolderName = s"./${sourceFolderBaseName}-nt" + val targetFolderName = sourceFolderName.replace("nt", "binary") + createFolder(targetFolderName) + val source = new File(sourceFolderName) + val target = new File(targetFolderName) + val dictionary: HashMap[String, Int] = new HashMap[String, Int]() + for (entry <- QueryEncoding.m) { + dictionary.put(entry._1, entry._2) + } + var nextId = dictionary.values.max + 1 + val dictionaryPath = s"$targetFolderName/dictionary.txt" + + def register(entry: String): Int = { + val id = dictionary.get(entry) + if (id != 0) { + id + } else { + val idForEntry = nextId + dictionary.put(entry, idForEntry) + nextId += 1 + idForEntry + } + } + + def encodeFile(source: File, target: File) { + val is = new FileInputStream(source) + val binaryOs = new FileOutputStream(target) + val binaryDos = new DataOutputStream(binaryOs) + val nxp = new NxParser(is) + while (nxp.hasNext) { + val triple = nxp.next + val subjectString = triple(0).toString + if (!subjectString.startsWith("file:///Users")) { + val predicateString = triple(1).toString + val objectString = triple(2).toString + val sId = register(subjectString) + val pId = register(predicateString) + val oId = register(objectString) + binaryDos.writeInt(sId) + binaryDos.writeInt(pId) + binaryDos.writeInt(oId) + } + } + binaryDos.close + binaryOs.close + is.close + } + + // println("Reading base dictionary ...") + // val dictionaryFile = Source.fromFile(baseDictionaryPath) + // for (line <- dictionaryFile.getLines) { + // val entry = line.split(" ") + // if (entry.length == 3) { + // val id = entry(2).toInt + // nextId = math.max(nextId, id + 1) + // dictionary.put(entry(0), id) + // println("Added entry " + entry(0)) + // } else if (entry.length != 0) { + // throw new Exception(s"Failed to parse line $line, was parsed to ${entry.toList}.") + // } + // } + + println("Encoding files ...") + + val sourceFiles = filesIn(sourceFolderName). + filter(_.getName.endsWith(".nt")). + sorted + + for (src <- sourceFiles) { + val trg = new File(targetFolderName + "/" + src.getName.replace(".nt", ".binary")) + println(s"Encoding file $src.") + encodeFile(src, trg) + } + + println(s"${sourceFiles.length} files have been encoded, dictionary contains ${nextId} entries.") + + println("Writing dictionary.") + val dictionaryOs = new FileOutputStream(dictionaryPath) + val writer = new OutputStreamWriter(dictionaryOs, "UTF8") + for (entry <- dictionary) { + writer.write(s"${entry._1} -> ${entry._2}\n") + } + writer.close + dictionaryOs.close + + } +} diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/DictionaryEncoderWithPartitioning.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/DictionaryEncoderWithPartitioning.scala new file mode 100644 index 0000000..8081193 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/DictionaryEncoderWithPartitioning.scala @@ -0,0 +1,115 @@ +///* +// * @author Philip Stutz +// * @author Mihaela Verman +// * +// * Copyright 2013 University of Zurich +// * +// * Licensed under the Apache License, Version 2.0 (the "License"); +// * you may not use this file except in compliance with the License. +// * You may obtain a copy of the License at +// * +// * http://www.apache.org/licenses/LICENSE-2.0 +// * +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// * +// */ +// +//package com.signalcollect.triplerush.evaluation +// +//import java.io.DataOutputStream +//import java.io.File +//import java.io.FileInputStream +//import java.io.FileOutputStream +//import java.util.HashMap +//import collection.JavaConversions._ +//import org.semanticweb.yars.nx.parser.NxParser +//import java.io.OutputStreamWriter +//import com.signalcollect.triplerush.evaluation.lubm.FileOperations +// +//object DictionaryEncoderWithPartitioning extends KrakenExecutable with Serializable { +// runOnKraken(PartitioningEncoder.encode(args(0)) _) +//} +// +//object PartitioningEncoder { +// def encode(sourceFolderBaseName: String)() { +// import FileOperations._ +// +// val sourceFolderName = s"./${sourceFolderBaseName}-nt" +// val targetFolderName = sourceFolderName.replace("nt", "binary") +// createFolder(targetFolderName) +// val source = new File(sourceFolderName) +// val target = new File(targetFolderName) +// var nextId = 0 +// var universityId = 0 +// val dictionaryPath = s"$targetFolderName/dictionary.txt" +// val dictionary = new HashMap[String, Int]() +// val ub = "http://swat.cse.lehigh.edu/onto/univ-bench.owl" +// val rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns" +// +// def register(entry: String, university: Int): Int = { +// val id = dictionary.get(entry) +// if (id != 0) { +// id +// } else { +// val idForEntry = nextId | (university << 23) +// dictionary.put(entry, idForEntry) +// nextId += 1 +// assert(nextId < 8000000) +// idForEntry +// } +// } +// +// def encodeFile(source: File, target: File) { +// universityId = source.getName.split("University")(1).split("_")(0).toInt +// assert(universityId < 256) +// val is = new FileInputStream(source) +// val binaryOs = new FileOutputStream(target) +// val binaryDos = new DataOutputStream(binaryOs) +// val nxp = new NxParser(is) +// while (nxp.hasNext) { +// val triple = nxp.next +// val subjectString = triple(0).toString +// if (!subjectString.startsWith("file:///Users")) { +// val predicateString = triple(1).toString +// val objectString = triple(2).toString +// val sId = register(subjectString, universityId) +// val pId = register(predicateString, universityId) +// val oId = register(objectString, universityId) +// binaryDos.writeInt(sId) +// binaryDos.writeInt(pId) +// binaryDos.writeInt(oId) +// } +// } +// binaryDos.close +// binaryOs.close +// is.close +// } +// println("Encoding files ...") +// +// val sourceFiles = filesIn(sourceFolderName). +// filter(_.getName.endsWith(".nt")). +// sorted +// +// for (src <- sourceFiles) { +// val trg = new File(targetFolderName + "/" + src.getName.replace(".nt", ".binary")) +// println(s"Encoding file $src.") +// encodeFile(src, trg) +// } +// +// println(s"${sourceFiles.length} files have been encoded, ${nextId + 1} unique ids.") +// +// println("Writing dictionary.") +// val dictionaryOs = new FileOutputStream(dictionaryPath) +// val writer = new OutputStreamWriter(dictionaryOs, "UTF8") +// for (entry <- dictionary) { +// writer.write(s"${entry._1} -> ${entry._2}\n") +// } +// writer.close +// dictionaryOs.close +// +// } +//} diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/DuplicateFilter.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/DuplicateFilter.scala new file mode 100644 index 0000000..0f9d1af --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/DuplicateFilter.scala @@ -0,0 +1,73 @@ +/* + * @author Philip Stutz + * @author Mihaela Verman + * + * Copyright 2013 University of Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.signalcollect.triplerush.evaluation.lubm + +import java.io.File +import java.io.FileInputStream +import com.signalcollect.triplerush.TriplePattern +import java.io.FileOutputStream +import java.io.DataOutputStream +import collection.JavaConversions._ +import java.io.DataInputStream +import java.io.EOFException +import scala.collection.mutable.HashSet + +object DuplicateFilter extends FileTransformation with Serializable { + + override def executionHost = kraken + def nameOfTransformation = "filtered" + def sourceFolder = s"${args(0)}-binary-splits" + override def destinationFolder = sourceFolder.replace("binary", nameOfTransformation) + override def shouldTransformFile(f: File) = f.getName.endsWith(".split") + override def extensionTransformer(fileName: String) = fileName.replace(".split", ".filtered-split") + override def transform(sourceFile: File, targetFile: File) { + val is = new FileInputStream(sourceFile) + val dis = new DataInputStream(is) + val tripleSet = HashSet[TriplePattern]() + try { + while (true) { + val sId = dis.readInt + val pId = dis.readInt + val oId = dis.readInt + val pattern = TriplePattern(sId, pId, oId) + assert(pattern.isFullyBound) + tripleSet.add(pattern) + } + } catch { + case done: EOFException => + dis.close + is.close + case t: Throwable => + throw t + } + val splitFileOutputStream = new FileOutputStream(targetFile) + val splitDataOutputStream = new DataOutputStream(splitFileOutputStream) + for (triplePattern <- tripleSet) { + splitDataOutputStream.writeInt(triplePattern.s) + splitDataOutputStream.writeInt(triplePattern.p) + splitDataOutputStream.writeInt(triplePattern.o) + } + splitDataOutputStream.close + splitFileOutputStream.close + println(tripleSet.size) + } + +} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/FileSplitter.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/FileSplitter.scala new file mode 100644 index 0000000..c72bce7 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/FileSplitter.scala @@ -0,0 +1,135 @@ +/* + * @author Philip Stutz + * @author Mihaela Verman + * + * Copyright 2013 University of Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.signalcollect.triplerush.evaluation.lubm +import java.io.FileInputStream +import com.signalcollect.triplerush.TriplePattern +import java.io.FileOutputStream +import java.io.DataOutputStream +import collection.JavaConversions._ +import java.io.DataInputStream +import java.io.EOFException +import com.signalcollect.nodeprovisioning.torque.TorqueHost +import com.signalcollect.nodeprovisioning.torque.TorqueJobSubmitter +import com.signalcollect.nodeprovisioning.torque.TorquePriority +import FileOperations.createFolder +import FileOperations.filesIn +import com.signalcollect.triplerush.evaluation.Evaluation +import scala.Array.canBuildFrom +import com.signalcollect.triplerush.TripleMapper +import com.signalcollect.nodeprovisioning.torque.LocalHost + +object FileSplitter extends App { + + def assemblyPath = "./target/scala-2.10/triplerush-assembly-1.0-SNAPSHOT.jar" + val kraken = new TorqueHost( + jobSubmitter = new TorqueJobSubmitter(username = System.getProperty("user.name"), hostname = "kraken.ifi.uzh.ch"), + coresPerNode = 1, + localJarPath = assemblyPath, priority = TorquePriority.fast) + val local = new LocalHost + var evaluation = new Evaluation( + executionHost = kraken) + + for (modulo <- 0 until 4) { // Has to match parallelism variable inside splitting function. + evaluation = evaluation.addEvaluationRun(splitNtriples(modulo, args(0))) + } + + evaluation.execute + + def splitNtriples(mod: Int, baseSourceFolderName: String)(): List[Map[String, String]] = { + import FileOperations._ + println("Modulo is: " + mod) + val splits = 2880 + val mapper = new TripleMapper[Any](numberOfNodes = splits, workersPerNode = 1) + val parallelism = 4 + val sourceFolder = s"./$baseSourceFolderName-binary" + val destinationFolder = sourceFolder + "-splits" + createFolder(destinationFolder) + + val fileOutputStreams = { + (0 until splits).toArray map { splitId => + if (mod == splitId % parallelism) { + val binaryOs = new FileOutputStream(s"$destinationFolder/$splitId.split") + binaryOs + } else { + null.asInstanceOf[FileOutputStream] + } + } + } + val dataOutputStreams = fileOutputStreams map { fileOutputStream => + if (fileOutputStream != null) { + val binaryDos = new DataOutputStream(fileOutputStream) + binaryDos + } else { + null.asInstanceOf[DataOutputStream] + } + } + val binaryExtension = ".binary" + val initialTime = System.currentTimeMillis + var totalTriplesSplit = 0 + val files = filesIn(sourceFolder).filter(_.getName.endsWith("binary")) + val fileCount = files.length + var filesProcessed = 0 + + for (file <- files) { + filesProcessed += 1 + print(s"Processing file ${filesProcessed}/$fileCount ...") + val triplesSplit = splitFile(file.getAbsolutePath) + totalTriplesSplit += triplesSplit + println(s"Triples processed so far: $totalTriplesSplit") + } + dataOutputStreams.foreach { s => if (s != null) s.close } + fileOutputStreams.foreach { s => if (s != null) s.close } + + def splitFile(path: String): Int = { + val is = new FileInputStream(path) + val dis = new DataInputStream(is) + var triplesSplit = 0 + try { + while (true) { + val sId = dis.readInt + val pId = dis.readInt + val oId = dis.readInt + val pattern = TriplePattern(sId, pId, oId) + if (!pattern.isFullyBound) { + println(s"Problem: $pattern, triple #${triplesSplit + 1} in file $path is not fully bound.") + } else { + val patternSplit = mapper.getWorkerIdForVertexId(pattern) + if (mod == patternSplit % parallelism) { + val splitStream = dataOutputStreams(patternSplit) + splitStream.writeInt(sId) + splitStream.writeInt(pId) + splitStream.writeInt(oId) + triplesSplit += 1 + } + } + } + } catch { + case done: EOFException => + dis.close + is.close + case t: Throwable => + throw t + } + triplesSplit + } + List() + } +} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/FileTransformation.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/FileTransformation.scala new file mode 100644 index 0000000..66f1d02 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/FileTransformation.scala @@ -0,0 +1,94 @@ +/* + * @author Philip Stutz + * @author Mihaela Verman + * + * Copyright 2013 University of Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.signalcollect.triplerush.evaluation.lubm + +import java.io.File +import collection.JavaConversions._ +import java.nio.file.Paths +import java.nio.file.Files + +abstract class FileTransformation extends KrakenExecutable { + import FileOperations._ + + def parallel = false + def nameOfTransformation: String + def sourceFolder: String + def destinationFolder = sourceFolder + "-" + nameOfTransformation + def extensionTransformer(fileName: String): String + + def shouldTransformFile(f: File) = true + + def remoteRun(srcFolder: String, dstFolder: String)() { + + def fileInDestinationFolder(fileName: String) = dstFolder + "/" + extensionTransformer(fileName) + lazy val target = new File(dstFolder) + + if (!target.exists) { + target.mkdir + } + + val files = filesIn(srcFolder).filter(shouldTransformFile) + println("Starting file transformations...") + if (parallel) { + for (file <- files.par) { + callTransform(file) + } + } else { + for (file <- files) { + callTransform(file) + } + } + println("All files have been transformed.") + + def callTransform(file: File) { + println(s"Transforming file ${file.getName}") + transform(file, new File(fileInDestinationFolder(file.getName))) + } + + } + + run(remoteRun(sourceFolder, destinationFolder) _) + def transform(sourceFile: File, targetFile: File) + +} + +object FileOperations { + def filesIn(folderPath: String): Array[File] = { + val folder = new File(folderPath) + folder.listFiles + } + + def move(f: File, folder: File) { + if (f.exists && folder.exists && folder.isDirectory) { + val source = Paths.get(f.getAbsolutePath) + val target = Paths.get(new File(folder.getAbsolutePath + "/" + f.getName).getAbsolutePath) + Files.move(source, target) + } + } + + def createFolder(path: String): File = { + val f = new File(path) + if (!f.exists) { + f.mkdir + } + f + } +} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/KrakenExecutable.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/KrakenExecutable.scala new file mode 100644 index 0000000..8aee7d3 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/KrakenExecutable.scala @@ -0,0 +1,59 @@ +/* + * @author Philip Stutz + * @author Mihaela Verman + * + * Copyright 2013 University of Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.signalcollect.triplerush.evaluation.lubm +import collection.JavaConversions._ +import com.signalcollect.nodeprovisioning.torque.TorqueHost +import com.signalcollect.nodeprovisioning.torque.TorqueJobSubmitter +import com.signalcollect.nodeprovisioning.torque.TorquePriority +import com.signalcollect.triplerush.evaluation.Evaluation +import com.signalcollect.nodeprovisioning.torque.LocalHost +import com.signalcollect.nodeprovisioning.torque.ExecutionHost + +trait KrakenExecutable extends App { + def assemblyPath = "./target/scala-2.10/triplerush-assembly-1.0-SNAPSHOT.jar" + val kraken = new TorqueHost( + jobSubmitter = new TorqueJobSubmitter(username = System.getProperty("user.name"), hostname = "kraken.ifi.uzh.ch"), + coresPerNode = 1, + localJarPath = assemblyPath, priority = TorquePriority.fast) + val local = new LocalHost + def executionHost: ExecutionHost = kraken + var evaluation = new Evaluation( + executionHost = executionHost) + + def run(f: () => Unit) { + evaluation = evaluation.addEvaluationRun(Wrapper.wrapFunctionToReturnEmptyList(f)) + evaluation.execute + } +} + +/** + * On separate object to circumvent serialization issues. + */ +object Wrapper { + def wrapFunctionToReturnEmptyList(f: () => Unit) = { + def wrappedF: List[Map[String, String]] = { + f() + List() + } + wrappedF _ + } + +} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/LubmGenerator.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/LubmGenerator.scala new file mode 100644 index 0000000..901f403 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/LubmGenerator.scala @@ -0,0 +1,46 @@ +/* + * @author Philip Stutz + * @author Mihaela Verman + * + * Copyright 2013 University of Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.signalcollect.triplerush.evaluation.lubm +import scala.sys.process._ +import java.io.File + +object LubmGenerator extends KrakenExecutable { + run(Generator.generate(args(0).toInt) _) +} + +object Generator { + def generate (universities: Int)() { + import FileOperations._ + + // Generate raw LUBM files. + s"java -cp uba.jar edu.lehigh.swat.bench.uba.Generator -univ $universities -onto http://swat.cse.lehigh.edu/onto/univ-bench.owl" !! (ProcessLogger(println(_))) + + // Create new directory and move files there. + val targetFolder = createFolder(s"./lubm$universities") + println("Moving OWL files ...") + for (owlFile <- filesIn("./").filter(_.getName.endsWith("owl"))) { + println(s"Moving file ${owlFile.getName} ...") + move(owlFile, targetFolder) + } + move(new File("./log.txt"), targetFolder) + println("All LUBM files have been copied.") + } +} \ No newline at end of file diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/NtriplesConverter.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/NtriplesConverter.scala new file mode 100644 index 0000000..64ae777 --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/NtriplesConverter.scala @@ -0,0 +1,40 @@ +/* + * @author Philip Stutz + * @author Mihaela Verman + * + * Copyright 2013 University of Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.signalcollect.triplerush.evaluation.lubm + +import java.io.File +import scala.sys.process._ + +object NtriplesConverter extends FileTransformation with Serializable { + + def nameOfTransformation = "nt" + def sourceFolder = s"./${args(0)}" + + override def shouldTransformFile(f: File) = f.getName.endsWith(".owl") + override def extensionTransformer(fileName: String) = fileName.replace(".owl", ".nt") + // Convert the OWL files to ntriples format. + def transform(sourceFile: File, targetFile: File) { + if (!targetFile.exists) { + Seq("/usr/bin/rapper", sourceFile.getAbsolutePath, "-e", "-o", "ntriples") #>> targetFile !! (ProcessLogger(println(_))) + } + } + +} diff --git a/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/TripleCounter.scala b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/TripleCounter.scala new file mode 100644 index 0000000..d55e47a --- /dev/null +++ b/src/main/scala/com/signalcollect/triplerush/evaluation/lubm/TripleCounter.scala @@ -0,0 +1,74 @@ +/* + * @author Philip Stutz + * @author Mihaela Verman + * + * Copyright 2013 University of Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.signalcollect.triplerush.evaluation.lubm +import java.io.File +import java.io.FileInputStream +import java.io.EOFException +import java.io.DataInputStream + +object TripleCounter extends App { + val toCountFolderName = args(0) + val splits = 2880 + val folder = new File(toCountFolderName) + val initialTime = System.currentTimeMillis + var triplesCounted = 0 + for (split <- (0 until splits)) { + val startTime = System.currentTimeMillis + print(s"Processing split $split/$splits ...") + val triplesInSplit = countSplit(split) + triplesCounted += triplesInSplit + println(s" Done.") + val endTime = System.currentTimeMillis + val uniProcessingTime = (endTime - startTime).toDouble / 1000 + println(s"Processing took $uniProcessingTime seconds.") + val totalTimeSoFar = ((endTime - initialTime).toDouble / 1000) / 3600 + val totalTimeSoFarRounded = totalTimeSoFar.floor + println(s"Total elapsed time: $totalTimeSoFarRounded hours and ${((totalTimeSoFar - totalTimeSoFarRounded) * 60).floor.toInt} minutes.") + val estimatedTimePerSplit = totalTimeSoFar / (split + 1).toDouble + val remainingSplits = (splits - (split + 1)) + val estimatedRemaining = remainingSplits * estimatedTimePerSplit + val estimatedRemainingRounded = estimatedRemaining.floor + println(s"Estimated remaining time for remaining splits: ${estimatedRemainingRounded.toInt} hours and ${((estimatedRemaining - estimatedRemainingRounded) * 60).floor.toInt} minutes.") + println(s"# triples after filtering so far: $triplesCounted") + } + + def countSplit(split: Int): Int = { + val is = new FileInputStream(s"./$toCountFolderName/$split.filtered-split") + val dis = new DataInputStream(is) + var triplesCounted = 0 + try { + while (true) { + val sId = dis.readInt + val pId = dis.readInt + val oId = dis.readInt + triplesCounted += 1 + } + } catch { + case done: EOFException => + dis.close + is.close + case t: Throwable => + throw t + } + triplesCounted + } + +} \ No newline at end of file