qubole · maheshnayak616 · Aug 18, 2020 · Aug 18, 2020 · Aug 23, 2020 · Aug 25, 2020
diff --git a/example/qds_spark_example.py → example/qds_spark_python_example.py b/example/qds_spark_example.py → example/qds_spark_python_example.py
@@ -1,5 +1,5 @@
 """
-This is a sample code used for submitting a Spark script (SparkCommand) and getting the result.
+This is a sample code used for submitting a Spark Python/R/Scala command and getting the result.
 """
 
 from qds_sdk.qubole import Qubole
@@ -27,7 +27,7 @@ def get_content(script_file_path):
     return content
 
 
-def execute_query(cluster_label, cmd_to_run, language, user_program_arguments=None, arguments=None):
+def execute_script(cluster_label, cmd_to_run, language, user_program_arguments=None, arguments=None):
     """
     Helper method to execute a script
     :param cluster_label:
@@ -45,17 +45,9 @@ def execute_query(cluster_label, cmd_to_run, language, user_program_arguments=No
         print("language cannot be None or empty")
         return
 
-    if language in ["command_line"]:
-        # A Shell command needs to be invoked in this fashion
-        cmd = SparkCommand.create(label=cluster_label, cmdline=cmd_to_run, arguments=arguments,
-                                  user_program_arguments=user_program_arguments)
-    elif language == "sql":
-        # A SQL command needs to be invoked in this fashion
-        cmd = SparkCommand.create(label=cluster_label, sql=cmd_to_run, arguments=arguments)
-    else:
-        # A python, R or scala command needs to be invoked in this fashion.
-        cmd = SparkCommand.create(label=cluster_label, program=cmd_to_run, language=language,
-                                  arguments=arguments, user_program_arguments=user_program_arguments)
+    # A python, R or scala command needs to be invoked in this fashion.
+    cmd = SparkCommand.create(label=cluster_label, program=cmd_to_run, language=language, arguments=arguments,
+                              user_program_arguments=user_program_arguments)
 
     while not SparkCommand.is_done(cmd.status):
         print("Waiting for completion of command : {}".format(cmd.id))
@@ -89,27 +81,17 @@ def get_results(command):
 if __name__ == '__main__':
     # Set the API token. If you are using any other environment other then api.qubole.com then set api_url to that url
     # as <env_url>/api
-    Qubole.configure(api_token='<auth_token>')
+    Qubole.configure(api_token='<api_token>')
 
     filename = "<your script location>"
-    user_program_arguments = None # arguments for your script
-    arguments = None # spark configuration for your program for ex : "--conf spark.executor.memory=1024M"
-    cluster_label = "<your cluster label>" # the cluster on which the command will run
+    script_language = None  # Script language.. Python, R or scala
+    user_program_arguments = None  # arguments for your script
+    arguments = None  # spark configuration for your program for ex : "--conf spark.executor.memory=1024M"
+    cluster_label = "<your cluster label>"  # the cluster on which the command will run
 
     # Running a python command. In case of scala or R get the script content and then set the langauage field to scala
     # or R as required
     script = get_content(filename)
-    command = execute_query(cluster_label, script, "python", user_program_arguments=user_program_arguments,
-                            arguments=arguments)
-    get_results(command)
-
-    # Running a SQL command
-    script = "show tables"
-    command = execute_query(cluster_label, script, "sql", arguments=arguments)
-    get_results(command)
-
-    # Running a shell command
-    script = "/usr/lib/spark/bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn " \
-             "--deploy-mode client /usr/lib/spark/spark-examples* 1"
-    command = execute_query(cluster_label, script, "command_line", arguments=arguments)
+    command = execute_script(cluster_label, script, script_language, user_program_arguments=user_program_arguments,
+                             arguments=arguments)
     get_results(command)
diff --git a/example/qds_spark_scala_inline_example.py b/example/qds_spark_scala_inline_example.py
@@ -0,0 +1,98 @@
+"""
+This is a sample code used for submitting a Spark Python/R/Scala command and getting the result.
+"""
+
+from qds_sdk.qubole import Qubole
+from qds_sdk.commands import SparkCommand
+import time
+
+
+def get_results_filename(command_id):
+    """
+    A helper method to generate a file name to write the downloaded result
+    :param command_id:
+    :return:
+    """
+    return "/tmp/result_{}.tsv".format(command_id)
+
+
+def execute_script(cluster_label, cmd_to_run, language, user_program_arguments=None, arguments=None):
+    """
+    Helper method to execute a script
+    :param cluster_label:
+    :param cmd_to_run:
+    :param language:
+    :param user_program_arguments:
+    :param arguments:
+    :return:
+    """
+    if script is None or script == "":
+        print("script cannot be None or empty")
+        return None
+
+    if not language:
+        print("language cannot be None or empty")
+        return
+
+    # A python, R or scala command needs to be invoked in this fashion.
+    cmd = SparkCommand.create(label=cluster_label, program=cmd_to_run, language=language, arguments=arguments,
+                              user_program_arguments=user_program_arguments)
+
+    while not SparkCommand.is_done(cmd.status):
+        print("Waiting for completion of command : {}".format(cmd.id))
+        cmd = SparkCommand.find(cmd.id)
+        time.sleep(5)
+
+    if SparkCommand.is_success(cmd.status):
+        print("\nCommand Executed: Completed successfully")
+    else:
+        print("\nCommand Executed: Failed!!!. The status returned is: {}".format(cmd.status))
+        print(cmd.get_log())
+    return cmd
+
+
+def get_results(command):
+    """
+    A helper method to get the results
+    :param command:
+    :return:
+    """
+    if command is None:
+        return None
+
+    results_file_name = get_results_filename(command.id)
+    fp = open(results_file_name, 'w')
+
+    command.get_results(fp, delim="\n")
+    print("results are written to {}".format(results_file_name))
+
+
+if __name__ == '__main__':
+    # Set the API token. If you are using any other environment other then api.qubole.com then set api_url to that url
+    # as <env_url>/api
+    Qubole.configure(api_token='<api_token>')
+
+    script_language = "scala"  # Script language.. Python, R or scala
+    user_program_arguments = None  # arguments for your script
+    arguments = None  # spark configuration for your program for ex : "--conf spark.executor.memory=1024M"
+    cluster_label = "<your cluster label>"  # the cluster on which the command will run
+
+    # Running a python command. In case of scala or R get the script content and then set the langauage field to scala
+    # or R as required
+    script = """
+            import org.apache.spark.sql.SparkSession
+
+            object TestMergeBucketIdScenarios {
+              def main(args: Array[String]): Unit = {
+                val spark = SparkSession.builder().appName("Spark Example").getOrCreate()
+                val sampleData = Seq(("John" ,19), ("Smith", 29),("Adam", 35),("Henry", 50))
+
+                import spark.implicits._
+                val dataFrame = sampleData.toDF("name", "age")
+                val output = dataFrame.select("name").where("age < 30").collect
+                output.foreach(println)
+              }
+            }"""
+    command = execute_script(cluster_label, script, script_language, user_program_arguments=user_program_arguments,
+                             arguments=arguments)
+    get_results(command)
diff --git a/example/qds_spark_shell_command_example.py b/example/qds_spark_shell_command_example.py
@@ -0,0 +1,74 @@
+"""
+This is a sample code used for submitting a Shell script as a SparkCommand on a spark Cluster and getting the result.
+"""
+
+from qds_sdk.qubole import Qubole
+from qds_sdk.commands import SparkCommand
+import time
+
+
+def get_results_filename(command_id):
+    """
+    A helper method to generate a file name to write the downloaded result
+    :param command_id:
+    :return:
+    """
+    return "/tmp/result_{}.tsv".format(command_id)
+
+
+def execute_spark_shell_command(cluster_label, cmd_to_run):
+    """
+    Helper method to execute a script
+    :param cluster_label:
+    :param cmd_to_run:
+    :return:
+    """
+    if cmd_to_run is None or cmd_to_run == "":
+        print("command to run cannot be None or empty")
+        return None
+
+    # A Shell command needs to be invoked in this fashion
+    cmd = SparkCommand.create(label=cluster_label, cmdline=cmd_to_run)
+
+    while not SparkCommand.is_done(cmd.status):
+        print("Waiting for completion of command : {}".format(cmd.id))
+        cmd = SparkCommand.find(cmd.id)
+        time.sleep(5)
+
+    if SparkCommand.is_success(cmd.status):
+        print("\nCommand Executed: Completed successfully")
+    else:
+        print("\nCommand Executed: Failed!!!. The status returned is: {}".format(cmd.status))
+        print(cmd.get_log())
+    return cmd
+
+
+def get_results(command):
+    """
+    A helper method to get the results
+    :param command:
+    :return:
+    """
+    if command is None:
+        return None
+
+    results_file_name = get_results_filename(command.id)
+    fp = open(results_file_name, 'w')
+
+    command.get_results(fp, delim="\n")
+    print("results are written to {}".format(results_file_name))
+
+
+if __name__ == '__main__':
+    # Set the API token. If you are using any other environment other then api.qubole.com then set api_url to that url
+    # as <env_url>/api
+    Qubole.configure(api_token='<api_token>')
+
+    arguments = None  # spark configuration for your program for ex : "--conf spark.executor.memory=1024M"
+    cluster_label = "<your cluster label>"  # the cluster on which the command will run
+
+    # Running a shell command
+    script = "/usr/lib/spark/bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn " \
+             "--deploy-mode client /usr/lib/spark/spark-examples* 1"
+    command = execute_spark_shell_command(cluster_label, script)
+    get_results(command)
diff --git a/example/qds_spark_sql_example.py b/example/qds_spark_sql_example.py
@@ -0,0 +1,74 @@
+"""
+This is a sample code used for submitting a SQL query as a SparkCommand and getting the result.
+"""
+
+from qds_sdk.qubole import Qubole
+from qds_sdk.commands import SparkCommand
+import time
+
+
+def get_results_filename(command_id):
+    """
+    A helper method to generate a file name to write the downloaded result
+    :param command_id:
+    :return:
+    """
+    return "/tmp/result_{}.tsv".format(command_id)
+
+
+def execute_sql_query(cluster_label, query, arguments=None):
+    """
+    Helper method to execute a script
+    :param cluster_label:
+    :param query:
+    :param arguments:
+    :return:
+    """
+    if query is None or query == "":
+        print("query cannot be None or empty")
+        return None
+
+    # A SQL command needs to be invoked in this fashion
+    cmd = SparkCommand.create(label=cluster_label, sql=query, arguments=arguments)
+
+    while not SparkCommand.is_done(cmd.status):
+        print("Waiting for completion of command : {}".format(cmd.id))
+        cmd = SparkCommand.find(cmd.id)
+        time.sleep(5)
+
+    if SparkCommand.is_success(cmd.status):
+        print("\nCommand Executed: Completed successfully")
+    else:
+        print("\nCommand Executed: Failed!!!. The status returned is: {}".format(cmd.status))
+        print(cmd.get_log())
+    return cmd
+
+
+def get_results(command):
+    """
+    A helper method to get the results
+    :param command:
+    :return:
+    """
+    if command is None:
+        return None
+
+    results_file_name = get_results_filename(command.id)
+    fp = open(results_file_name, 'w')
+
+    command.get_results(fp, delim="\n")
+    print("results are written to {}".format(results_file_name))
+
+
+if __name__ == '__main__':
+    # Set the API token. If you are using any other environment other then api.qubole.com then set api_url to that url
+    # as <env_url>/api
+    Qubole.configure(api_token='<api_token>')
+
+    arguments = None  # spark configuration for your program for ex : "--conf spark.executor.memory=1024M"
+    cluster_label = "<your cluster label>"  # the cluster on which the command will run
+
+    # Running a SQL command
+    script = "show tables"
+    command = execute_sql_query(cluster_label, script, arguments=arguments)
+    get_results(command)