Skip to content

Commit

Permalink
Fix issues and disable TPCDSSuite from precheckin (#953)
Browse files Browse the repository at this point in the history
- Fixed the data and checkout path issue
- Disabling TPCDSSuite test run from precheckin as it takes around 2 hrs. Planning to add TPCDS tests to be run as a part of smokePerf bt which will run on a dedicated machine
- Disabling the validation in TPCDSSuite for now as this requires the expected result files to be - created using stock spark beforehand
- Modified test description in startUp test
  • Loading branch information
swatisawant authored Feb 7, 2018
1 parent 113a14d commit 28b43fe
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,36 +16,35 @@
*/

package io.snappydata.benchmark.snappy
import java.io.{File, FileOutputStream, PrintStream}

import io.snappydata.{Constant, SnappyFunSuite}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{Row, SnappyContext, SnappySession, SparkSession}
import org.apache.spark.sql.execution.benchmark.{TPCDSQueryBenchmark, TPCDSQuerySnappyBenchmark}
import org.scalatest.{BeforeAndAfterAll, FunSuite}
import java.io.{File, FileOutputStream, PrintStream}

import scala.collection.mutable.ArrayBuffer
import io.snappydata.SnappyFunSuite
import org.apache.spark.sql.execution.benchmark.TPCDSQuerySnappyBenchmark
import org.apache.spark.sql.{SnappySession, SparkSession}
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest.BeforeAndAfterAll


class TPCDSSuite extends SnappyFunSuite
with BeforeAndAfterAll {
with BeforeAndAfterAll {

var tpcdsQueries = Seq[String]()


val conf =
new SparkConf()
.setMaster("local[*]")
.setAppName("test-sql-context")
.set("spark.driver.allowMultipleContexts", "true")
.set("spark.sql.shuffle.partitions", "4")
.set("spark.driver.memory", "1g")
.set("spark.executor.memory", "1g")
.set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString)
.setMaster("local[*]")
.setAppName("test-sql-context")
.set("spark.driver.allowMultipleContexts", "true")
.set("spark.sql.shuffle.partitions", "4")
.set("spark.driver.memory", "1g")
.set("spark.executor.memory", "1g")
.set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString)

override def beforeAll(): Unit = {
super.beforeAll()
tpcdsQueries = Seq(
tpcdsQueries = Seq(
"q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11",
"q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19", "q20",
"q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27", "q28", "q29", "q30",
Expand All @@ -58,28 +57,40 @@ class TPCDSSuite extends SnappyFunSuite
"q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99")
}

test("Test with Spark") {
TPCDSQuerySnappyBenchmark.spark = SparkSession.builder.config(conf).getOrCreate()
val dataLocation = "Directory location for TPCDS data"
val snappyRepo = "Directory path of snappy repo"

TPCDSQuerySnappyBenchmark.execute(dataLocation,
queries = tpcdsQueries, false, s"$snappyRepo/spark/sql/core/src/test/resources/tpcds")
// Disabling the test run from precheckin as it takes around an hour.
// TODO : Add TPCDS tests to be run as a part of smokePerf bt which will run on a dedicated
// machine.


}

test("Test with Snappy") {
ignore("Test with Snappy") {
val sc = new SparkContext(conf)
TPCDSQuerySnappyBenchmark.snappy = new SnappySession(sc)
val dataLocation = "Directory location for TPCDS data"
val snappyRepo = "Directory path of snappy repo"
val dataLocation = "/export/shared/QA_DATA/TPCDS/data"
val snappyHome = System.getenv("SNAPPY_HOME")
val snappyRepo = s"$snappyHome/../../.."

TPCDSQuerySnappyBenchmark.execute(dataLocation,
queries = tpcdsQueries, true, s"$snappyRepo/spark/sql/core/src/test/resources/tpcds")
}

test("Validate Results") {
// Disabling the test run from precheckin as it takes around an hour.
// TODO : Add TPCDS tests to be run as a part of smokePerf bt which will run on a dedicated
// machine.

ignore("Test with Spark") {
TPCDSQuerySnappyBenchmark.spark = SparkSession.builder.config(conf).getOrCreate()
val dataLocation = "/export/shared/QA_DATA/TPCDS/data"
val snappyHome = System.getenv("SNAPPY_HOME")
val snappyRepo = s"$snappyHome/../../..";

TPCDSQuerySnappyBenchmark.execute(dataLocation,
queries = tpcdsQueries, false, s"$snappyRepo/spark/sql/core/src/test/resources/tpcds")

}

// Disabling the validation for now as this requires the expected result files to be created
// using stock spark before hand.

ignore("Validate Results") {

for (query <- tpcdsQueries) {

Expand All @@ -98,8 +109,8 @@ class TPCDSSuite extends SnappyFunSuite
if (!actualLineSet.equals(expectedLineSet)) {
if (!(expectedLineSet.size == actualLineSet.size)) {
resultOutputStream.println(s"For $query " +
s"result count mismatched observed with " +
s"expected ${expectedLineSet.size} and actual ${actualLineSet.size}")
s"result count mismatched observed with " +
s"expected ${expectedLineSet.size} and actual ${actualLineSet.size}")
} else {
for ((expectedLine, actualLine) <- expectedLineSet zip actualLineSet) {
if (!expectedLine.equals(actualLine)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,11 @@ up config with abruptly killing server members during the query execution and th
cluster at the end.";
hydra.Prms-testDescription = "
This test starts the snappy cluster and spark cluster.
Test then runs the spark App for creating and loading data in persistent column tables using
northwind schema and data.
It then executes the snappy job and sql script in parallel along with abruptly killing servers
one by one during the ops are in progress and then randomizing the order of server startup config
after stopping and before restarting the cluster.
Test then runs the sql script for creating and loading data in persistent column tables using northwind schema and data.
It then executes the snappy job, spark app and sql script in parallel along with abruptly killing servers one by one during the ops are in progress and then randomizing the order of server startup config after stopping and before restarting the cluster.
Snappy job executes and validate the northwind queries on the tables created and loaded through split mode.
sql script only executes the northwind queries on the tables created and loaded through split mode.
At the last, test verifies that the cluster is restarted successfully";
At the last, test verifies that the cluster is restarted successfully every time.";

INCLUDE $JTESTS/io/snappydata/hydra/startUp/serverMeanKill.inc;
INCLUDE $JTESTS/io/snappydata/hydra/startUp/clusterRestart.inc;
Expand Down

0 comments on commit 28b43fe

Please sign in to comment.