Refactored integration tests (#53)

* Extended integration tests * Split integration tests across stages * Fixed integration test build variable name * Update README.md * Update README.md * Delete test_spec.json
Azure-Samples · Sep 30, 2019 · d8df7d7 · d8df7d7
1 parent 78a03e3
commit d8df7d7
Show file tree

Hide file tree

Showing 23 changed files with 288 additions and 110 deletions.
diff --git a/components/azure-databricks/peer-databricks-vnet.sh b/components/azure-databricks/peer-databricks-vnet.sh
@@ -3,7 +3,7 @@
 # Strict mode, fail on any error
 set -euo pipefail
 
-databricksResourceGroup=${DATABRICKS_RESOURCE_GROUP:-$RESOURCE_GROUP}
+databricksResourceGroup=${DATABRICKS_VNET_RESOURCE_GROUP:-$RESOURCE_GROUP}
 
 echo "Getting VNET ids"
 databricks_vnet_name="databricks-vnet"

diff --git a/components/azure-dataexplorer/create-dataexplorer.sh b/components/azure-dataexplorer/create-dataexplorer.sh
@@ -12,13 +12,28 @@ fi
 # Run as early as possible in script, as principal takes time to become available for RBAC operation below.
 echo "checking service principal exists"
 if ! az keyvault secret show --vault-name $DATAEXPLORER_KEYVAULT --name $DATAEXPLORER_CLIENT_NAME-password -o none 2>/dev/null ; then
-  echo "creating service principal"
-  password=$(az ad sp create-for-rbac \
-                --skip-assignment \
-                --name http://$DATAEXPLORER_CLIENT_NAME \
-                --query password \
-                --output tsv)
+  # When running in Azure DevOps pipeline (AzureCLI task with "addSpnToEnvironment: true"), use the provided service principal
+  if [ -n "${servicePrincipalId:-}" ]; then
+    appId="$servicePrincipalId"
+    password="$servicePrincipalKey"
+  # Otherwise create a new service principal
+  else
+    echo "creating service principal"
+    password=$(az ad sp create-for-rbac \
+                  --skip-assignment \
+                  --name http://$DATAEXPLORER_CLIENT_NAME \
+                  --query password \
+                  --output tsv)
+    echo "getting service principal"
+    appId=$(az ad sp show --id http://$DATAEXPLORER_CLIENT_NAME --query appId --output tsv)
+  fi
+
   echo "storing service principal in Key Vault"
+  az keyvault secret set \
+    --vault-name $DATAEXPLORER_KEYVAULT \
+    --name $DATAEXPLORER_CLIENT_NAME-id \
+    --value "$appId" \
+    -o tsv >>log.txt
   az keyvault secret set \
     --vault-name $DATAEXPLORER_KEYVAULT \
     --name $DATAEXPLORER_CLIENT_NAME-password \
@@ -64,8 +79,8 @@ if ! kustoQuery "/v1/rest/mgmt" ".show table EventTable ingestion json mapping \
   kustoQuery "/v1/rest/mgmt" ".create table EventTable ingestion json mapping 'EventMapping' '[ { \\\"column\\\": \\\"eventId\\\", \\\"path\\\": \\\"$.eventId\\\" }, { \\\"column\\\": \\\"complexData\\\", \\\"path\\\": \\\"$.complexData\\\" }, { \\\"column\\\": \\\"value\\\", \\\"path\\\": \\\"$.value\\\" }, { \\\"column\\\": \\\"type\\\", \\\"path\\\": \\\"$.type\\\" }, { \\\"column\\\": \\\"deviceId\\\", \\\"path\\\": \\\"$.deviceId\\\" }, { \\\"column\\\": \\\"createdAt\\\", \\\"path\\\": \\\"$.createdAt\\\" } ]'"
 fi
 
-echo "getting service principal"
-appId=$(az ad sp show --id http://$DATAEXPLORER_CLIENT_NAME --query appId --output tsv)
+echo "getting Service Principal ID"
+appId=$(az keyvault secret show --vault-name $DATAEXPLORER_KEYVAULT -n $DATAEXPLORER_CLIENT_NAME-id --query value -o tsv)
 
 echo "granting service principal Data Explorer database Viewer permissions"
 MAXRETRY=60

diff --git a/components/azure-hdinsight/create-hdinsight-kafka.sh b/components/azure-hdinsight/create-hdinsight-kafka.sh
@@ -20,7 +20,7 @@ az hdinsight create -t kafka -g $RESOURCE_GROUP -n $HDINSIGHT_NAME \
   --version 3.6 --component-version Kafka=1.1 \
   --zookeepernode-size Standard_D2_V2 \
   --headnode-size Standard_E2_V3 \
-  --workernode-size $HDINSIGHT_WORKER_SIZE --size $HDINSIGHT_WORKERS \
+  --workernode-size $HDINSIGHT_WORKER_SIZE --workernode-count $HDINSIGHT_WORKERS \
   --workernode-data-disks-per-node 2 \
   --vnet-name $VNET_NAME --subnet ingestion-subnet \
   --cluster-configurations "$config" \

diff --git a/eventhubs-databricks-azuresql/test_spec.json b/eventhubs-databricks-azuresql/test_spec.json
@@ -0,0 +1,13 @@
+[
+  {
+    "stage": "2",
+    "short": "eda1",
+    "steps": "CIDPTMV",
+    "minutes": "20",
+    "throughput": "1",
+    "extra_args": [
+      "-k",
+      "rowstore"
+    ]
+  }
+]
diff --git a/eventhubs-databricks-cosmosdb/test_spec.json b/eventhubs-databricks-cosmosdb/test_spec.json
@@ -0,0 +1,10 @@
+[
+  {
+    "stage": "2",
+    "short": "edc1",
+    "steps": "CIDPTMV",
+    "minutes": "20",
+    "throughput": "1",
+    "extra_args": []
+  }
+]
diff --git a/eventhubs-databricks-delta/test_spec.json b/eventhubs-databricks-delta/test_spec.json
@@ -0,0 +1,10 @@
+[
+  {
+    "stage": "1",
+    "short": "edd1",
+    "steps": "CIPTMV",
+    "minutes": "20",
+    "throughput": "1",
+    "extra_args": []
+  }
+]
diff --git a/eventhubs-dataexplorer/create-solution.sh b/eventhubs-dataexplorer/create-solution.sh
@@ -14,7 +14,7 @@ usage() {
     echo "    Possible values:"
     echo "      C=COMMON"
     echo "      I=INGESTION"
-    echo "      P=PROCESSING"
+    echo "      D=DATABASE"
     echo "      T=TEST clients"
     echo "      M=METRICS reporting"
     echo "      V=VERIFY deployment"

diff --git a/eventhubs-functions-azuresql/test_spec.json b/eventhubs-functions-azuresql/test_spec.json
@@ -0,0 +1,13 @@
+[
+  {
+    "stage": "2",
+    "short": "efa1",
+    "steps": "CIDPTMV",
+    "minutes": "10",
+    "throughput": "1",
+    "extra_args": [
+      "-k",
+      "rowstore"
+    ]
+  }
+]
diff --git a/eventhubs-functions-cosmosdb/test_spec.json b/eventhubs-functions-cosmosdb/test_spec.json
@@ -0,0 +1,13 @@
+[
+  {
+    "stage": "2",
+    "short": "efc1",
+    "steps": "CIDPTMV",
+    "minutes": "10",
+    "throughput": "1",
+    "extra_args": [
+      "-f",
+      "Test0"
+    ]
+  }
+]
diff --git a/eventhubs-streamanalytics-azuresql/test_spec.json b/eventhubs-streamanalytics-azuresql/test_spec.json
@@ -0,0 +1,13 @@
+[
+  {
+    "stage": "2",
+    "short": "esa1",
+    "steps": "CIDPTMV",
+    "minutes": "10",
+    "throughput": "1",
+    "extra_args": [
+      "-k",
+      "rowstore"
+    ]
+  }
+]
diff --git a/eventhubs-streamanalytics-cosmosdb/test_spec.json b/eventhubs-streamanalytics-cosmosdb/test_spec.json
@@ -0,0 +1,10 @@
+[
+  {
+    "stage": "2",
+    "short": "esc1",
+    "steps": "CIDPTMV",
+    "minutes": "10",
+    "throughput": "1",
+    "extra_args": []
+  }
+]
diff --git a/eventhubskafka-databricks-cosmosdb/test_spec.json b/eventhubskafka-databricks-cosmosdb/test_spec.json
@@ -0,0 +1,10 @@
+[
+  {
+    "stage": "2",
+    "short": "kdc1",
+    "steps": "CIDPTMV",
+    "minutes": "20",
+    "throughput": "1",
+    "extra_args": []
+  }
+]
diff --git a/hdinsightkafka-databricks-sqldw/test_spec.json b/hdinsightkafka-databricks-sqldw/test_spec.json
@@ -0,0 +1,13 @@
+[
+  {
+    "stage": "3",
+    "short": "hdw1",
+    "steps": "CIDPTMV",
+    "minutes": "20",
+    "throughput": "1",
+    "extra_args": [
+      "-k",
+      "columnstore"
+    ]
+  }
+]
diff --git a/integration-tests/README.md b/integration-tests/README.md
@@ -17,11 +17,54 @@ Since the provisioning of an Azure Databricks workspace cannot be fully
 automated at present, you must generate a PAT token of a preexisting workspace
 and supply it to the pipeline.
 
+## Installing the build agent
+
+As the integration tests can run for more than 6 hours, they must be run on self-hosted Azure DevOps agents.
+
+Create a project in Azure DevOps. Create an agent pool named "streaming-at-scale".
+
+In the Azure portal, create an Azure VM with Ubuntu 18.04 LTS.
+
+SSH to the VM and run the following commands interactively one a time.
+
+```bash
+# Install Azure DevOps agent. When prompted enter the Azure DevOps host and a PAT token with Agent Pool management permissions.
+mkdir agent
+cd agent
+wget https://vstsagentpackage.azureedge.net/agent/2.155.1/vsts-agent-linux-x64-2.155.1.tar.gz
+tar zxvf vsts-agent-linux-x64-2.155.1.tar.gz 
+./config.sh 
+sudo ./svc.sh install
+sudo ./svc.sh start
+# Install jq
+sudo apt update
+sudo apt install jq
+# Install zip
+sudo apt install zip
+# Install az (Azure CLI)
+curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
+# Install dotnet SDK
+wget -q https://packages.microsoft.com/config/ubuntu/18.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb
+sudo dpkg -i packages-microsoft-prod.deb
+sudo add-apt-repository universe
+sudo apt-get install apt-transport-https
+sudo apt-get update
+sudo apt-get install dotnet-sdk-2.2
+```
+
+
 ## Creating the integration test pipeline in Azure DevOps
 
-* Create a Databricks workspace in the Azure region of your choice.
-* Create a project in Azure Pipelines.
-* In your Azure Pipelines project settings, navigate to service connection and
+* Create a Databricks workspace in the Azure region of your choice:
+  * tier: standard
+  * make sure the workspace is deployed with a custom VNET (as the HDInsight
+    Kafka setup will need to peer VNETs). The custom VNET must be named
+    'databricks-vnet'.
+  * You can use the [Databricks VNET
+  template](https://azure.microsoft.com/en-us/resources/templates/101-databricks-all-in-one-template-for-vnet-injection/),
+  changing the tier to standard on the deployment screen.
+* Install a build agent (instructions below).
+* In your Azure DevOps project settings, navigate to service connection and
   create an ARM service connection to your Azure subscription named
   'ARMConnection'. Do not restrict the connection to a particular resource
   group.
@@ -33,8 +76,11 @@ and supply it to the pipeline.
 | Variable name          | Description                                    | Required? | Example    |
 | --------------------   | ---------------------------------------------- | --------- | ---------- |
 | LOCATION               | Azure region in which to deploy infrastructure | required  | eastus     |
-| DATABRICKS_PAT_TOKEN   | (secret variable) Databricks PAT token for a Databricks workspace deployed in $LOCATION | required | dapi01234567890123456789012345678901 |
+| DATABRICKS_PAT_TOKEN   | (secret variable) Databricks PAT token for a Databricks workspace deployed in $LOCATION | required | dapi012345... |
+| DATABRICKS_VNET_RESOURCE_GROUP | Resource Group containing the Databricks VNET | required | streamingitests |
 | RESOURCE_GROUP_PREFIX  | Prefix used to name deployed resources. Must be globally unique, use a sufficiently unique string  | required | xyzzy0x4 |
+| AGENT_VM_RESOURCE_GROUP | Resource group of the build agent VM  | required | streamingitests |
+| AGENT_VM_NAME          | Name of the build agent VM  | required | streamingbuildagent |
 
 
 ## Running the integration tests

diff --git a/integration-tests/azure-pipelines.yml b/integration-tests/azure-pipelines.yml
@@ -1,53 +1,56 @@
-trigger:
-- master
-
 jobs:
 
-- job: generator
+- job: start_agent
   steps:
-  - bash: echo "##vso[task.setVariable variable=TaskList;isOutput=true]$(bash integration-tests/generate-task-list.sh)"
-    name: GenerateTasks
-    displayName: Generating list of integration test tasks
+  - task: AzureCLI@1
+    inputs:
+      azureSubscription: ARMConnection
+      scriptLocation: 'inlineScript'
+      inlineScript: az vm start -g "$AGENT_VM_RESOURCE_GROUP" -n "$AGENT_VM_NAME"
+    displayName: 'start agent'
 
-- job: runner
-  dependsOn: generator
-  timeoutInMinutes: 180
+- job: run_tests
+  dependsOn: start_agent
+  timeoutInMinutes: 0
   pool:
-    vmImage: 'ubuntu-16.04'
-  strategy:
-    maxParallel: 1
-    matrix: $[ dependencies.generator.outputs['GenerateTasks.TaskList'] ]
+    name: streaming-at-scale
   variables:
     DATABRICKS_HOST: https://$(LOCATION).azuredatabricks.net
   steps:
 
-  - bash: >
-      set -e;
-      sudo apt install python3-setuptools;
-      sudo pip3 install wheel databricks-cli;
-      databricks clusters spark-versions; 
-      echo "##vso[task.setVariable variable=DATABRICKS_TOKEN]$DATABRICKS_TOKEN"; 
+  - bash: |
+      set -e
+      sudo apt install -y python3-pip python3-setuptools
+      sudo pip3 install wheel databricks-cli
+      databricks clusters spark-versions
+      echo "##vso[task.setVariable variable=DATABRICKS_TOKEN]$DATABRICKS_TOKEN"
     displayName: Install Databricks CLI and expose token to next tasks
     env:
       DATABRICKS_TOKEN: $(DATABRICKS_PAT_TOKEN)
 
+  - script: |
+      pip3 install pytest pytest-azurepipelines flaky
+    displayName: 'Install test dependencies'
+
   - task: AzureCLI@1
-    displayName: Check RG name available
     inputs:
       azureSubscription: ARMConnection
-      scriptPath: integration-tests/check-resource-group.sh
+      scriptLocation: 'inlineScript'
+      inlineScript: cd integration-tests && python3 -m pytest -s --stage 1
+    displayName: 'pytest stage 1'
 
   - task: AzureCLI@1
-    displayName: Run test
     inputs:
       azureSubscription: ARMConnection
-      workingDirectory: $(TestDir)
-      scriptPath: $(TestDir)/create-solution.sh
-      arguments: -d $(RG_NAME) $(TestArgs)
+      scriptLocation: 'inlineScript'
+      inlineScript: cd integration-tests && python3 -m pytest -s --stage 2
+    displayName: 'pytest stage 2'
 
   - task: AzureCLI@1
-    displayName: Delete RG
-    condition: always() # this step will always run, even if the pipeline is cancelled
     inputs:
       azureSubscription: ARMConnection
-      scriptPath: integration-tests/delete-resource-group.sh
+      scriptLocation: 'inlineScript'
+      inlineScript: cd integration-tests && python3 -m pytest -s --stage 3
+      # Provide service principal (for Azure Data Explorer RBAC setup)
+      addSpnToEnvironment: true
+    displayName: 'pytest stage 3'
diff --git a/integration-tests/check-resource-group.sh b/integration-tests/check-resource-group.sh
@@ -2,15 +2,13 @@
 
 set -euo pipefail
 
-RG_NAME="$RESOURCE_GROUP_PREFIX$SYSTEM_JOBNAME"
+RG_NAME="$1"
 
-if R=$(az group show -n $RG_NAME --query 'tags.streaming_at_scale_generated' -o tsv); then
-  if [ -z "$R" ]; then
+if group_info=$(az group show -n $RG_NAME --query 'tags.streaming_at_scale_generated' -o tsv); then
+  if [ -z "$group_info" ]; then
     echo "ERROR: Resource group $RG_NAME exists, and does not have tag streaming_at_scale_generated"
     exit 1
   fi
   echo "Deleting existing resource group $RG_NAME (as it has tag streaming_at_scale_generated)"
   az group delete -y -g $RG_NAME
 fi
-
-echo "##vso[task.setVariable variable=RG_NAME;]$RG_NAME"
diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
@@ -0,0 +1,2 @@
+def pytest_addoption(parser):
+    parser.addoption("--stage", required=True)
diff --git a/integration-tests/delete-resource-group.sh b/integration-tests/delete-resource-group.sh
@@ -2,7 +2,12 @@
 
 set -euo pipefail
 
+RG_NAME="$1"
+
 if [ -n "${RG_NAME:-}" ]; then
-  echo "Deleting RG $RG_NAME"
-  az group delete -y -g "$RG_NAME" --no-wait || true
+  echo "Checking if RG $RG_NAME exists"
+  if az group show -g "$RG_NAME" -o none 2>/dev/null; then
+     echo "Deleting RG $RG_NAME"
+     az group delete -y -g "$RG_NAME" --no-wait
+  fi
 fi
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		def pytest_addoption(parser):
		parser.addoption("--stage", required=True)