From 28b43fe8c00ac0ceee3d3519f11e2ed52196ce62 Mon Sep 17 00:00:00 2001 From: Swati Sawant Date: Wed, 7 Feb 2018 11:54:03 +0530 Subject: [PATCH] Fix issues and disable TPCDSSuite from precheckin (#953) - Fixed the data and checkout path issue - Disabling TPCDSSuite test run from precheckin as it takes around 2 hrs. Planning to add TPCDS tests to be run as a part of smokePerf bt which will run on a dedicated machine - Disabling the validation in TPCDSSuite for now as this requires the expected result files to be - created using stock spark beforehand - Modified test description in startUp test --- .../benchmark/snappy/TPCDSSuite.scala | 75 +++++++++++-------- ...estartWithRandomOrderForServerStartUp.conf | 9 +-- 2 files changed, 46 insertions(+), 38 deletions(-) diff --git a/cluster/src/test/scala/io/snappydata/benchmark/snappy/TPCDSSuite.scala b/cluster/src/test/scala/io/snappydata/benchmark/snappy/TPCDSSuite.scala index cf0e06078a..43eb2f6872 100644 --- a/cluster/src/test/scala/io/snappydata/benchmark/snappy/TPCDSSuite.scala +++ b/cluster/src/test/scala/io/snappydata/benchmark/snappy/TPCDSSuite.scala @@ -16,36 +16,35 @@ */ package io.snappydata.benchmark.snappy - import java.io.{File, FileOutputStream, PrintStream} - import io.snappydata.{Constant, SnappyFunSuite} - import org.apache.spark.{SparkConf, SparkContext} - import org.apache.spark.sql.{Row, SnappyContext, SnappySession, SparkSession} - import org.apache.spark.sql.execution.benchmark.{TPCDSQueryBenchmark, TPCDSQuerySnappyBenchmark} - import org.scalatest.{BeforeAndAfterAll, FunSuite} +import java.io.{File, FileOutputStream, PrintStream} - import scala.collection.mutable.ArrayBuffer +import io.snappydata.SnappyFunSuite +import org.apache.spark.sql.execution.benchmark.TPCDSQuerySnappyBenchmark +import org.apache.spark.sql.{SnappySession, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} +import org.scalatest.BeforeAndAfterAll class TPCDSSuite extends SnappyFunSuite - with BeforeAndAfterAll { + with BeforeAndAfterAll { var tpcdsQueries = Seq[String]() val conf = new SparkConf() - .setMaster("local[*]") - .setAppName("test-sql-context") - .set("spark.driver.allowMultipleContexts", "true") - .set("spark.sql.shuffle.partitions", "4") - .set("spark.driver.memory", "1g") - .set("spark.executor.memory", "1g") - .set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString) + .setMaster("local[*]") + .setAppName("test-sql-context") + .set("spark.driver.allowMultipleContexts", "true") + .set("spark.sql.shuffle.partitions", "4") + .set("spark.driver.memory", "1g") + .set("spark.executor.memory", "1g") + .set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString) override def beforeAll(): Unit = { super.beforeAll() - tpcdsQueries = Seq( + tpcdsQueries = Seq( "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27", "q28", "q29", "q30", @@ -58,28 +57,40 @@ class TPCDSSuite extends SnappyFunSuite "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99") } - test("Test with Spark") { - TPCDSQuerySnappyBenchmark.spark = SparkSession.builder.config(conf).getOrCreate() - val dataLocation = "Directory location for TPCDS data" - val snappyRepo = "Directory path of snappy repo" - - TPCDSQuerySnappyBenchmark.execute(dataLocation, - queries = tpcdsQueries, false, s"$snappyRepo/spark/sql/core/src/test/resources/tpcds") + // Disabling the test run from precheckin as it takes around an hour. + // TODO : Add TPCDS tests to be run as a part of smokePerf bt which will run on a dedicated + // machine. - - } - - test("Test with Snappy") { + ignore("Test with Snappy") { val sc = new SparkContext(conf) TPCDSQuerySnappyBenchmark.snappy = new SnappySession(sc) - val dataLocation = "Directory location for TPCDS data" - val snappyRepo = "Directory path of snappy repo" + val dataLocation = "/export/shared/QA_DATA/TPCDS/data" + val snappyHome = System.getenv("SNAPPY_HOME") + val snappyRepo = s"$snappyHome/../../.." TPCDSQuerySnappyBenchmark.execute(dataLocation, queries = tpcdsQueries, true, s"$snappyRepo/spark/sql/core/src/test/resources/tpcds") } - test("Validate Results") { + // Disabling the test run from precheckin as it takes around an hour. + // TODO : Add TPCDS tests to be run as a part of smokePerf bt which will run on a dedicated + // machine. + + ignore("Test with Spark") { + TPCDSQuerySnappyBenchmark.spark = SparkSession.builder.config(conf).getOrCreate() + val dataLocation = "/export/shared/QA_DATA/TPCDS/data" + val snappyHome = System.getenv("SNAPPY_HOME") + val snappyRepo = s"$snappyHome/../../.."; + + TPCDSQuerySnappyBenchmark.execute(dataLocation, + queries = tpcdsQueries, false, s"$snappyRepo/spark/sql/core/src/test/resources/tpcds") + + } + + // Disabling the validation for now as this requires the expected result files to be created + // using stock spark before hand. + + ignore("Validate Results") { for (query <- tpcdsQueries) { @@ -98,8 +109,8 @@ class TPCDSSuite extends SnappyFunSuite if (!actualLineSet.equals(expectedLineSet)) { if (!(expectedLineSet.size == actualLineSet.size)) { resultOutputStream.println(s"For $query " + - s"result count mismatched observed with " + - s"expected ${expectedLineSet.size} and actual ${actualLineSet.size}") + s"result count mismatched observed with " + + s"expected ${expectedLineSet.size} and actual ${actualLineSet.size}") } else { for ((expectedLine, actualLine) <- expectedLineSet zip actualLineSet) { if (!expectedLine.equals(actualLine)) { diff --git a/dtests/src/test/java/io/snappydata/hydra/startUp/nwTestWithAllServerHA_meanKill_clusterRestartWithRandomOrderForServerStartUp.conf b/dtests/src/test/java/io/snappydata/hydra/startUp/nwTestWithAllServerHA_meanKill_clusterRestartWithRandomOrderForServerStartUp.conf index 230271ba2e..eff49049a7 100644 --- a/dtests/src/test/java/io/snappydata/hydra/startUp/nwTestWithAllServerHA_meanKill_clusterRestartWithRandomOrderForServerStartUp.conf +++ b/dtests/src/test/java/io/snappydata/hydra/startUp/nwTestWithAllServerHA_meanKill_clusterRestartWithRandomOrderForServerStartUp.conf @@ -3,14 +3,11 @@ up config with abruptly killing server members during the query execution and th cluster at the end."; hydra.Prms-testDescription = " This test starts the snappy cluster and spark cluster. -Test then runs the spark App for creating and loading data in persistent column tables using -northwind schema and data. -It then executes the snappy job and sql script in parallel along with abruptly killing servers -one by one during the ops are in progress and then randomizing the order of server startup config - after stopping and before restarting the cluster. +Test then runs the sql script for creating and loading data in persistent column tables using northwind schema and data. +It then executes the snappy job, spark app and sql script in parallel along with abruptly killing servers one by one during the ops are in progress and then randomizing the order of server startup config after stopping and before restarting the cluster. Snappy job executes and validate the northwind queries on the tables created and loaded through split mode. sql script only executes the northwind queries on the tables created and loaded through split mode. -At the last, test verifies that the cluster is restarted successfully"; +At the last, test verifies that the cluster is restarted successfully every time."; INCLUDE $JTESTS/io/snappydata/hydra/startUp/serverMeanKill.inc; INCLUDE $JTESTS/io/snappydata/hydra/startUp/clusterRestart.inc;