Skip to content

Commit

Permalink
Refactored integration tests (#53)
Browse files Browse the repository at this point in the history
* Extended integration tests

* Split integration tests across stages

* Fixed integration test build variable name

* Update README.md

* Update README.md

* Delete test_spec.json
  • Loading branch information
algattik authored Sep 30, 2019
1 parent 78a03e3 commit d8df7d7
Show file tree
Hide file tree
Showing 23 changed files with 288 additions and 110 deletions.
2 changes: 1 addition & 1 deletion components/azure-databricks/peer-databricks-vnet.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Strict mode, fail on any error
set -euo pipefail

databricksResourceGroup=${DATABRICKS_RESOURCE_GROUP:-$RESOURCE_GROUP}
databricksResourceGroup=${DATABRICKS_VNET_RESOURCE_GROUP:-$RESOURCE_GROUP}

echo "Getting VNET ids"
databricks_vnet_name="databricks-vnet"
Expand Down
31 changes: 23 additions & 8 deletions components/azure-dataexplorer/create-dataexplorer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,28 @@ fi
# Run as early as possible in script, as principal takes time to become available for RBAC operation below.
echo "checking service principal exists"
if ! az keyvault secret show --vault-name $DATAEXPLORER_KEYVAULT --name $DATAEXPLORER_CLIENT_NAME-password -o none 2>/dev/null ; then
echo "creating service principal"
password=$(az ad sp create-for-rbac \
--skip-assignment \
--name http://$DATAEXPLORER_CLIENT_NAME \
--query password \
--output tsv)
# When running in Azure DevOps pipeline (AzureCLI task with "addSpnToEnvironment: true"), use the provided service principal
if [ -n "${servicePrincipalId:-}" ]; then
appId="$servicePrincipalId"
password="$servicePrincipalKey"
# Otherwise create a new service principal
else
echo "creating service principal"
password=$(az ad sp create-for-rbac \
--skip-assignment \
--name http://$DATAEXPLORER_CLIENT_NAME \
--query password \
--output tsv)
echo "getting service principal"
appId=$(az ad sp show --id http://$DATAEXPLORER_CLIENT_NAME --query appId --output tsv)
fi

echo "storing service principal in Key Vault"
az keyvault secret set \
--vault-name $DATAEXPLORER_KEYVAULT \
--name $DATAEXPLORER_CLIENT_NAME-id \
--value "$appId" \
-o tsv >>log.txt
az keyvault secret set \
--vault-name $DATAEXPLORER_KEYVAULT \
--name $DATAEXPLORER_CLIENT_NAME-password \
Expand Down Expand Up @@ -64,8 +79,8 @@ if ! kustoQuery "/v1/rest/mgmt" ".show table EventTable ingestion json mapping \
kustoQuery "/v1/rest/mgmt" ".create table EventTable ingestion json mapping 'EventMapping' '[ { \\\"column\\\": \\\"eventId\\\", \\\"path\\\": \\\"$.eventId\\\" }, { \\\"column\\\": \\\"complexData\\\", \\\"path\\\": \\\"$.complexData\\\" }, { \\\"column\\\": \\\"value\\\", \\\"path\\\": \\\"$.value\\\" }, { \\\"column\\\": \\\"type\\\", \\\"path\\\": \\\"$.type\\\" }, { \\\"column\\\": \\\"deviceId\\\", \\\"path\\\": \\\"$.deviceId\\\" }, { \\\"column\\\": \\\"createdAt\\\", \\\"path\\\": \\\"$.createdAt\\\" } ]'"
fi

echo "getting service principal"
appId=$(az ad sp show --id http://$DATAEXPLORER_CLIENT_NAME --query appId --output tsv)
echo "getting Service Principal ID"
appId=$(az keyvault secret show --vault-name $DATAEXPLORER_KEYVAULT -n $DATAEXPLORER_CLIENT_NAME-id --query value -o tsv)

echo "granting service principal Data Explorer database Viewer permissions"
MAXRETRY=60
Expand Down
2 changes: 1 addition & 1 deletion components/azure-hdinsight/create-hdinsight-kafka.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ az hdinsight create -t kafka -g $RESOURCE_GROUP -n $HDINSIGHT_NAME \
--version 3.6 --component-version Kafka=1.1 \
--zookeepernode-size Standard_D2_V2 \
--headnode-size Standard_E2_V3 \
--workernode-size $HDINSIGHT_WORKER_SIZE --size $HDINSIGHT_WORKERS \
--workernode-size $HDINSIGHT_WORKER_SIZE --workernode-count $HDINSIGHT_WORKERS \
--workernode-data-disks-per-node 2 \
--vnet-name $VNET_NAME --subnet ingestion-subnet \
--cluster-configurations "$config" \
Expand Down
13 changes: 13 additions & 0 deletions eventhubs-databricks-azuresql/test_spec.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[
{
"stage": "2",
"short": "eda1",
"steps": "CIDPTMV",
"minutes": "20",
"throughput": "1",
"extra_args": [
"-k",
"rowstore"
]
}
]
10 changes: 10 additions & 0 deletions eventhubs-databricks-cosmosdb/test_spec.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
{
"stage": "2",
"short": "edc1",
"steps": "CIDPTMV",
"minutes": "20",
"throughput": "1",
"extra_args": []
}
]
10 changes: 10 additions & 0 deletions eventhubs-databricks-delta/test_spec.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
{
"stage": "1",
"short": "edd1",
"steps": "CIPTMV",
"minutes": "20",
"throughput": "1",
"extra_args": []
}
]
2 changes: 1 addition & 1 deletion eventhubs-dataexplorer/create-solution.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ usage() {
echo " Possible values:"
echo " C=COMMON"
echo " I=INGESTION"
echo " P=PROCESSING"
echo " D=DATABASE"
echo " T=TEST clients"
echo " M=METRICS reporting"
echo " V=VERIFY deployment"
Expand Down
13 changes: 13 additions & 0 deletions eventhubs-functions-azuresql/test_spec.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[
{
"stage": "2",
"short": "efa1",
"steps": "CIDPTMV",
"minutes": "10",
"throughput": "1",
"extra_args": [
"-k",
"rowstore"
]
}
]
13 changes: 13 additions & 0 deletions eventhubs-functions-cosmosdb/test_spec.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[
{
"stage": "2",
"short": "efc1",
"steps": "CIDPTMV",
"minutes": "10",
"throughput": "1",
"extra_args": [
"-f",
"Test0"
]
}
]
13 changes: 13 additions & 0 deletions eventhubs-streamanalytics-azuresql/test_spec.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[
{
"stage": "2",
"short": "esa1",
"steps": "CIDPTMV",
"minutes": "10",
"throughput": "1",
"extra_args": [
"-k",
"rowstore"
]
}
]
10 changes: 10 additions & 0 deletions eventhubs-streamanalytics-cosmosdb/test_spec.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
{
"stage": "2",
"short": "esc1",
"steps": "CIDPTMV",
"minutes": "10",
"throughput": "1",
"extra_args": []
}
]
10 changes: 10 additions & 0 deletions eventhubskafka-databricks-cosmosdb/test_spec.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
{
"stage": "2",
"short": "kdc1",
"steps": "CIDPTMV",
"minutes": "20",
"throughput": "1",
"extra_args": []
}
]
13 changes: 13 additions & 0 deletions hdinsightkafka-databricks-sqldw/test_spec.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[
{
"stage": "3",
"short": "hdw1",
"steps": "CIDPTMV",
"minutes": "20",
"throughput": "1",
"extra_args": [
"-k",
"columnstore"
]
}
]
54 changes: 50 additions & 4 deletions integration-tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,54 @@ Since the provisioning of an Azure Databricks workspace cannot be fully
automated at present, you must generate a PAT token of a preexisting workspace
and supply it to the pipeline.

## Installing the build agent

As the integration tests can run for more than 6 hours, they must be run on self-hosted Azure DevOps agents.

Create a project in Azure DevOps. Create an agent pool named "streaming-at-scale".

In the Azure portal, create an Azure VM with Ubuntu 18.04 LTS.

SSH to the VM and run the following commands interactively one a time.

```bash
# Install Azure DevOps agent. When prompted enter the Azure DevOps host and a PAT token with Agent Pool management permissions.
mkdir agent
cd agent
wget https://vstsagentpackage.azureedge.net/agent/2.155.1/vsts-agent-linux-x64-2.155.1.tar.gz
tar zxvf vsts-agent-linux-x64-2.155.1.tar.gz
./config.sh
sudo ./svc.sh install
sudo ./svc.sh start
# Install jq
sudo apt update
sudo apt install jq
# Install zip
sudo apt install zip
# Install az (Azure CLI)
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
# Install dotnet SDK
wget -q https://packages.microsoft.com/config/ubuntu/18.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb
sudo dpkg -i packages-microsoft-prod.deb
sudo add-apt-repository universe
sudo apt-get install apt-transport-https
sudo apt-get update
sudo apt-get install dotnet-sdk-2.2
```


## Creating the integration test pipeline in Azure DevOps

* Create a Databricks workspace in the Azure region of your choice.
* Create a project in Azure Pipelines.
* In your Azure Pipelines project settings, navigate to service connection and
* Create a Databricks workspace in the Azure region of your choice:
* tier: standard
* make sure the workspace is deployed with a custom VNET (as the HDInsight
Kafka setup will need to peer VNETs). The custom VNET must be named
'databricks-vnet'.
* You can use the [Databricks VNET
template](https://azure.microsoft.com/en-us/resources/templates/101-databricks-all-in-one-template-for-vnet-injection/),
changing the tier to standard on the deployment screen.
* Install a build agent (instructions below).
* In your Azure DevOps project settings, navigate to service connection and
create an ARM service connection to your Azure subscription named
'ARMConnection'. Do not restrict the connection to a particular resource
group.
Expand All @@ -33,8 +76,11 @@ and supply it to the pipeline.
| Variable name | Description | Required? | Example |
| -------------------- | ---------------------------------------------- | --------- | ---------- |
| LOCATION | Azure region in which to deploy infrastructure | required | eastus |
| DATABRICKS_PAT_TOKEN | (secret variable) Databricks PAT token for a Databricks workspace deployed in $LOCATION | required | dapi01234567890123456789012345678901 |
| DATABRICKS_PAT_TOKEN | (secret variable) Databricks PAT token for a Databricks workspace deployed in $LOCATION | required | dapi012345... |
| DATABRICKS_VNET_RESOURCE_GROUP | Resource Group containing the Databricks VNET | required | streamingitests |
| RESOURCE_GROUP_PREFIX | Prefix used to name deployed resources. Must be globally unique, use a sufficiently unique string | required | xyzzy0x4 |
| AGENT_VM_RESOURCE_GROUP | Resource group of the build agent VM | required | streamingitests |
| AGENT_VM_NAME | Name of the build agent VM | required | streamingbuildagent |


## Running the integration tests
Expand Down
61 changes: 32 additions & 29 deletions integration-tests/azure-pipelines.yml
Original file line number Diff line number Diff line change
@@ -1,53 +1,56 @@
trigger:
- master

jobs:

- job: generator
- job: start_agent
steps:
- bash: echo "##vso[task.setVariable variable=TaskList;isOutput=true]$(bash integration-tests/generate-task-list.sh)"
name: GenerateTasks
displayName: Generating list of integration test tasks
- task: AzureCLI@1
inputs:
azureSubscription: ARMConnection
scriptLocation: 'inlineScript'
inlineScript: az vm start -g "$AGENT_VM_RESOURCE_GROUP" -n "$AGENT_VM_NAME"
displayName: 'start agent'

- job: runner
dependsOn: generator
timeoutInMinutes: 180
- job: run_tests
dependsOn: start_agent
timeoutInMinutes: 0
pool:
vmImage: 'ubuntu-16.04'
strategy:
maxParallel: 1
matrix: $[ dependencies.generator.outputs['GenerateTasks.TaskList'] ]
name: streaming-at-scale
variables:
DATABRICKS_HOST: https://$(LOCATION).azuredatabricks.net
steps:

- bash: >
set -e;
sudo apt install python3-setuptools;
sudo pip3 install wheel databricks-cli;
databricks clusters spark-versions;
echo "##vso[task.setVariable variable=DATABRICKS_TOKEN]$DATABRICKS_TOKEN";
- bash: |
set -e
sudo apt install -y python3-pip python3-setuptools
sudo pip3 install wheel databricks-cli
databricks clusters spark-versions
echo "##vso[task.setVariable variable=DATABRICKS_TOKEN]$DATABRICKS_TOKEN"
displayName: Install Databricks CLI and expose token to next tasks
env:
DATABRICKS_TOKEN: $(DATABRICKS_PAT_TOKEN)
- script: |
pip3 install pytest pytest-azurepipelines flaky
displayName: 'Install test dependencies'
- task: AzureCLI@1
displayName: Check RG name available
inputs:
azureSubscription: ARMConnection
scriptPath: integration-tests/check-resource-group.sh
scriptLocation: 'inlineScript'
inlineScript: cd integration-tests && python3 -m pytest -s --stage 1
displayName: 'pytest stage 1'

- task: AzureCLI@1
displayName: Run test
inputs:
azureSubscription: ARMConnection
workingDirectory: $(TestDir)
scriptPath: $(TestDir)/create-solution.sh
arguments: -d $(RG_NAME) $(TestArgs)
scriptLocation: 'inlineScript'
inlineScript: cd integration-tests && python3 -m pytest -s --stage 2
displayName: 'pytest stage 2'

- task: AzureCLI@1
displayName: Delete RG
condition: always() # this step will always run, even if the pipeline is cancelled
inputs:
azureSubscription: ARMConnection
scriptPath: integration-tests/delete-resource-group.sh
scriptLocation: 'inlineScript'
inlineScript: cd integration-tests && python3 -m pytest -s --stage 3
# Provide service principal (for Azure Data Explorer RBAC setup)
addSpnToEnvironment: true
displayName: 'pytest stage 3'
8 changes: 3 additions & 5 deletions integration-tests/check-resource-group.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@

set -euo pipefail

RG_NAME="$RESOURCE_GROUP_PREFIX$SYSTEM_JOBNAME"
RG_NAME="$1"

if R=$(az group show -n $RG_NAME --query 'tags.streaming_at_scale_generated' -o tsv); then
if [ -z "$R" ]; then
if group_info=$(az group show -n $RG_NAME --query 'tags.streaming_at_scale_generated' -o tsv); then
if [ -z "$group_info" ]; then
echo "ERROR: Resource group $RG_NAME exists, and does not have tag streaming_at_scale_generated"
exit 1
fi
echo "Deleting existing resource group $RG_NAME (as it has tag streaming_at_scale_generated)"
az group delete -y -g $RG_NAME
fi

echo "##vso[task.setVariable variable=RG_NAME;]$RG_NAME"
2 changes: 2 additions & 0 deletions integration-tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def pytest_addoption(parser):
parser.addoption("--stage", required=True)
9 changes: 7 additions & 2 deletions integration-tests/delete-resource-group.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

set -euo pipefail

RG_NAME="$1"

if [ -n "${RG_NAME:-}" ]; then
echo "Deleting RG $RG_NAME"
az group delete -y -g "$RG_NAME" --no-wait || true
echo "Checking if RG $RG_NAME exists"
if az group show -g "$RG_NAME" -o none 2>/dev/null; then
echo "Deleting RG $RG_NAME"
az group delete -y -g "$RG_NAME" --no-wait
fi
fi
Loading

0 comments on commit d8df7d7

Please sign in to comment.