From 8719e44d5eac08ecfb7f742e4b17160a255a5866 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Wed, 24 Apr 2019 21:33:44 +0200 Subject: [PATCH] TPCH Benchmark fixes (#44) --- benchmark/csharp/Tpch/Program.cs | 2 ++ benchmark/scala/src/main/scala/com/microsoft/tpch/App.scala | 4 +++- .../main/scala/com/microsoft/tpch/TpchFunctionalQueries.scala | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/benchmark/csharp/Tpch/Program.cs b/benchmark/csharp/Tpch/Program.cs index 14e1a8019..660e1ed8a 100644 --- a/benchmark/csharp/Tpch/Program.cs +++ b/benchmark/csharp/Tpch/Program.cs @@ -18,6 +18,8 @@ private static void Main(string[] args) Console.WriteLine("\t --master local"); Console.WriteLine("\t\t--class org.apache.spark.deploy.DotnetRunner "); Console.WriteLine("\t\tTpch.exe "); + + return; } var tpchRoot = args[0]; diff --git a/benchmark/scala/src/main/scala/com/microsoft/tpch/App.scala b/benchmark/scala/src/main/scala/com/microsoft/tpch/App.scala index baaaf1c94..908ee4c3f 100644 --- a/benchmark/scala/src/main/scala/com/microsoft/tpch/App.scala +++ b/benchmark/scala/src/main/scala/com/microsoft/tpch/App.scala @@ -14,8 +14,10 @@ object App { def main(args: Array[String]) { if (args.length != 4) { println("Usage:") - println("\t --master local --class com.microsoft.tpch.App microsoft-spark-examples-.jar") + println("\t --master local --class com.microsoft.tpch.App microsoft-spark-benchmark-.jar") println("\t\t ") + + return } val tpchRoot = args(0) diff --git a/benchmark/scala/src/main/scala/com/microsoft/tpch/TpchFunctionalQueries.scala b/benchmark/scala/src/main/scala/com/microsoft/tpch/TpchFunctionalQueries.scala index d1f7c27ce..89c08ed29 100644 --- a/benchmark/scala/src/main/scala/com/microsoft/tpch/TpchFunctionalQueries.scala +++ b/benchmark/scala/src/main/scala/com/microsoft/tpch/TpchFunctionalQueries.scala @@ -250,10 +250,10 @@ class TpchFunctionalQueries(spark: SparkSession, tpchRoot: String) customer.join(order, $"c_custkey" === order("o_custkey") && !special(order("o_comment")), "left_outer") - .groupBy($"o_custkey") + .groupBy($"c_custkey") .agg(count($"o_orderkey").as("c_count")) .groupBy($"c_count") - .agg(count($"o_custkey").as("custdist")) + .agg(count($"*").as("custdist")) .sort($"custdist".desc, $"c_count".desc) .show() }