Skip to content
This repository has been archived by the owner on Jan 12, 2024. It is now read-only.

Commit

Permalink
Add cross region RDS cluster restore command (#137)
Browse files Browse the repository at this point in the history
* Add cross region RDS cluster restore command

* Change command name to better reflect it's nature
  • Loading branch information
mayitbeegh authored and fieldju committed Apr 22, 2019
1 parent a518dae commit c442a75
Show file tree
Hide file tree
Showing 9 changed files with 323 additions and 13 deletions.
2 changes: 2 additions & 0 deletions src/main/java/com/nike/cerberus/cli/CerberusRunner.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
import com.nike.cerberus.command.core.ViewConfigCommand;
import com.nike.cerberus.command.core.WhitelistCidrForVpcAccessCommand;
import com.nike.cerberus.command.core.UpdateStackTagsCommand;
import com.nike.cerberus.command.rds.XRegionDatabaseReplicationCommand;
import com.nike.cerberus.domain.input.EnvironmentConfig;
import com.nike.cerberus.logging.LoggingConfigurer;
import com.nike.cerberus.module.CerberusModule;
Expand Down Expand Up @@ -240,6 +241,7 @@ private void registerAllCommands() {
registerCommand(new SyncConfigCommand());
registerCommand(new CreateAlbLogAthenaDbAndTableCommand());
registerCommand(new CreateCmsResourcesForRegionCommand());
registerCommand(new XRegionDatabaseReplicationCommand());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import com.nike.cerberus.command.rds.CreateDatabaseCommand;
import com.nike.cerberus.command.certificates.UploadCertificateFilesCommand;
import com.nike.cerberus.command.certificates.UploadCertificateFilesCommandParametersDelegate;
import com.nike.cerberus.command.rds.XRegionDatabaseReplicationCommand;
import com.nike.cerberus.domain.cloudformation.CloudFormationParametersDelegate;
import com.nike.cerberus.domain.input.EnvironmentConfig;
import com.nike.cerberus.domain.input.ManagementServiceInput;
Expand Down Expand Up @@ -173,6 +174,9 @@ public static List<String> getArgsForCommand(EnvironmentConfig environmentConfig
case CreateCmsResourcesForRegionCommand.COMMAND_NAME:
args = Arrays.asList(passedArgs);
break;
case XRegionDatabaseReplicationCommand.COMMAND_NAME:
args = Arrays.asList(passedArgs);
break;
default:
break;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Copyright (c) 2019 Nike, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nike.cerberus.command.rds;

import com.beust.jcommander.Parameter;
import com.beust.jcommander.Parameters;
import com.nike.cerberus.command.Command;
import com.nike.cerberus.operation.Operation;
import com.nike.cerberus.operation.rds.XRegionDatabaseReplicationOperation;

import static com.nike.cerberus.command.rds.CopyRdsSnapshotsCommand.COMMAND_NAME;

@Parameters(
commandNames = COMMAND_NAME,
commandDescription = "Restores RDS cluster in the target region for this environment from " +
"a fresh RDS cluster snapshot created in the source region"
)
public class XRegionDatabaseReplicationCommand implements Command {

public static final String COMMAND_NAME = "x-region-database-replication";

public static final String TARGET_REGION_LONG_ARG = "--target-region";

public static final String SOURCE_REGION_LONG_ARG = "--source-region";

@Parameter(
names = TARGET_REGION_LONG_ARG,
description = "The AWS Region to restore RDS cluster snapshot in",
required = true
)
private String targetRegion;

public String getTargetRegion() {
return targetRegion;
}

@Parameter(
names = SOURCE_REGION_LONG_ARG,
description = "The AWS Region to create RDS cluster snapshot in",
required = true
)
private String sourceRegion;

public String getSourceRegion() {
return sourceRegion;
}

@Override
public String getCommandName() {
return COMMAND_NAME;
}

@Override
public Class<? extends Operation<?>> getOperationClass() {
return XRegionDatabaseReplicationOperation.class;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ public void run(UpdateStackCommand command) {
parameters,
true,
command.isOverwriteTemplate(),
command.getCloudFormationParametersDelegate().getTags()
);
command.getCloudFormationParametersDelegate().getTags(),
false);

logger.info("Update complete.");
} catch (AmazonServiceException ase) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ public void run(UpdateStackTagsCommand command) {
parameters,
true,
false,
command.getCloudFormationParametersDelegate().getTags()
);
command.getCloudFormationParametersDelegate().getTags(),
false);

logger.info("Update complete.");
} catch (AmazonServiceException ase) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* Copyright (c) 2019 Nike, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nike.cerberus.operation.rds;

import com.amazonaws.AmazonServiceException;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.cloudformation.model.StackResourceSummary;
import com.amazonaws.services.rds.model.DBClusterSnapshot;
import com.google.common.collect.ImmutableList;
import com.nike.cerberus.command.rds.XRegionDatabaseReplicationCommand;
import com.nike.cerberus.domain.environment.Stack;
import com.nike.cerberus.operation.Operation;
import com.nike.cerberus.service.CloudFormationService;
import com.nike.cerberus.service.RdsService;
import com.nike.cerberus.store.ConfigStore;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.inject.Inject;
import javax.inject.Named;
import java.util.List;
import java.util.Map;

import static com.nike.cerberus.module.CerberusModule.ENV_NAME;
import static com.google.common.collect.MoreCollectors.onlyElement;

/**
* Operation for XRegionDatabaseReplicationCommand
*
* Restores rds cluster in the target region for this environment from a fresh RDS cluster snapshot created in the source region
*/
public class XRegionDatabaseReplicationOperation implements Operation<XRegionDatabaseReplicationCommand> {

private final Logger log = LoggerFactory.getLogger(getClass());

private final ConfigStore configStore;
private final String environmentName;
private final RdsService rdsService;
private final CloudFormationService cloudFormationService;

@Inject
public XRegionDatabaseReplicationOperation(ConfigStore configStore,
@Named(ENV_NAME) String environmentName,
RdsService rdsService,
CloudFormationService cloudFormationService) {

this.configStore = configStore;
this.environmentName = environmentName;
this.rdsService = rdsService;
this.cloudFormationService = cloudFormationService;
}

@Override
public void run(XRegionDatabaseReplicationCommand command) {
Regions sourceRegion = Regions.fromName(command.getSourceRegion());
Regions targetRegion = Regions.fromName(command.getTargetRegion());

String stackName = Stack.DATABASE.getFullName(environmentName);
List<StackResourceSummary> stackResources = cloudFormationService.getStackResources(sourceRegion, stackName);
StackResourceSummary sourceDbCluster = stackResources
.stream()
.filter(resource -> "CmsDatabaseCluster".equals(resource.getLogicalResourceId()))
.collect(onlyElement());
String sourceDbClusterId = sourceDbCluster.getPhysicalResourceId();

log.info("Preparing to create snapshot of RDS cluster in region: {}", sourceRegion);
DBClusterSnapshot sourceSnapshot = rdsService.createSnapshot(sourceDbClusterId, sourceRegion);
rdsService.waitForSnapshotsToBecomeAvailable(sourceSnapshot, sourceRegion);

log.info("Preparing to initiate copy of RDS DB snapshot: {} located in region: {} to region: {}",
sourceSnapshot.getDBClusterSnapshotIdentifier(), sourceRegion.getName(), targetRegion.getName());
DBClusterSnapshot copiedSnapshot = rdsService.copySnapshot(sourceSnapshot, sourceRegion, targetRegion);
rdsService.waitForSnapshotsToBecomeAvailable(copiedSnapshot, targetRegion);

rdsService.deleteSnapshot(sourceSnapshot, sourceRegion);

String databasePassword = configStore.getCmsDatabasePassword()
.orElseThrow(() -> new RuntimeException("Expected the database password to exist"));
Map<String, String> parameters = cloudFormationService.getStackParameters(targetRegion, stackName);
parameters.put("snapshotIdentifier", copiedSnapshot.getDBClusterSnapshotIdentifier());
parameters.put("cmsDbMasterPassword", databasePassword);

try {
log.info("Preparing to initiate restore of RDS DB snapshot {} in region {}",
copiedSnapshot.getDBClusterSnapshotIdentifier(),
targetRegion);

cloudFormationService.updateStackAndWait(
targetRegion,
Stack.DATABASE,
parameters,
true,
false,
null,
true);

log.info("Restore complete.");
} catch (AmazonServiceException ase) {
if (ase.getStatusCode() == 400 &&
StringUtils.equalsIgnoreCase(ase.getErrorMessage(), "No updates are to be performed.")) {
log.warn("CloudFormation reported no changes detected.");
} else {
throw ase;
}
}

rdsService.deleteSnapshot(copiedSnapshot, targetRegion);
}



@Override
public boolean isRunnable(XRegionDatabaseReplicationCommand command) {
boolean isRunnable = true;
Regions targetRegion = Regions.fromName(command.getTargetRegion());
Regions sourceRegion = Regions.fromName(command.getSourceRegion());

ImmutableList<Regions> regions = ImmutableList.of(targetRegion, sourceRegion);
if (!configStore.getCmsRegions().containsAll(regions)) {
log.error("The source and target regions must be configured for the environment");
isRunnable = false;
}

if (isRunnable && !cloudFormationService.isStackPresent(targetRegion, Stack.DATABASE.getFullName(environmentName))) {
log.error("The Database stack must exist in the target region in order to restore snapshot");
isRunnable = false;
}

return isRunnable;
}
}
32 changes: 28 additions & 4 deletions src/main/java/com/nike/cerberus/service/CloudFormationService.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
import com.nike.cerberus.ConfigConstants;
import com.nike.cerberus.domain.environment.Stack;
import com.nike.cerberus.operation.UnexpectedCloudFormationStatusException;
import com.nike.cerberus.store.ConfigStore;
import org.apache.commons.lang3.StringUtils;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
Expand All @@ -68,7 +67,6 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
Expand All @@ -91,6 +89,10 @@ public class CloudFormationService {

private final String environmentName;

private static final String WAITER_TIMEOUT_ERROR_MESSAGE = "Reached maximum attempts without transitioning to the desired state";

private static final int EXTENDED_WAITER_RETRY_COUNT = 3;

@Inject
public CloudFormationService(AwsClientFactory<AmazonCloudFormationClient> cloudFormationClientFactory,
@Named(ENV_NAME) String environmentName) {
Expand Down Expand Up @@ -198,13 +200,15 @@ private void waitAndPrintCFEvents(Regions region, String stackName, Waiter waite
* @param iamCapabilities flag for iam capabilities
* @param overwrite overwrite the deployed template with the current template in the cli
* @param globalTags map of tags to apply to all resources created/updated
* @param extended extend waiter timeout to 3 hours
*/
public void updateStackAndWait(Regions region,
Stack stack,
Map<String, String> parameters,
boolean iamCapabilities,
boolean overwrite,
Map<String, String> globalTags) {
Map<String, String> globalTags,
boolean extended) {

String stackName = stack.getFullName(environmentName);

Expand Down Expand Up @@ -236,7 +240,27 @@ public void updateStackAndWait(Regions region,
AmazonCloudFormation cloudFormationClient = cloudFormationClientFactory.getClient(region);
cloudFormationClient.updateStack(request);

waitAndPrintCFEvents(region, stackName, new AmazonCloudFormationWaiters(cloudFormationClient).stackUpdateComplete());
if (extended) {
// horrible hack because Amazon has made it basically impossible to customize waiter
int retry_counter = 0;
while (retry_counter < EXTENDED_WAITER_RETRY_COUNT) {
try {
waitAndPrintCFEvents(region, stackName, new AmazonCloudFormationWaiters(cloudFormationClient).stackUpdateComplete());
break;
} catch (UnexpectedCloudFormationStatusException e) {
if (e.getMessage().contains(WAITER_TIMEOUT_ERROR_MESSAGE) && retry_counter < EXTENDED_WAITER_RETRY_COUNT - 1) {
retry_counter ++;
continue;
} else {
// throw when it's not timeout or if it's the last retry
throw e;
}
}
}
} else {
waitAndPrintCFEvents(region, stackName, new AmazonCloudFormationWaiters(cloudFormationClient).stackUpdateComplete());
}


}

Expand Down
Loading

0 comments on commit c442a75

Please sign in to comment.