Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shared: exit on refresh error #50

Merged
merged 4 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package edu.utexas.tacc.tapis.shared.exceptions.runtime;

import java.io.Serial;
/*
* Exception thrown when errors prevent refreshing the Service JWT. Program will exit.
*/
public class TapisJWTExpirationException
extends TapisRuntimeException
{
@Serial
private static final long serialVersionUID = -4517149013759206472L;

public TapisJWTExpirationException(String message) {super(message);}
public TapisJWTExpirationException(String message, Throwable cause) {super(message, cause);}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,31 @@

import edu.utexas.tacc.tapis.client.shared.exceptions.TapisClientException;
import edu.utexas.tacc.tapis.shared.exceptions.TapisException;
import edu.utexas.tacc.tapis.shared.exceptions.runtime.TapisJWTExpirationException;
import edu.utexas.tacc.tapis.shared.i18n.MsgUtils;
import edu.utexas.tacc.tapis.shared.utils.TapisGsonUtils;
import edu.utexas.tacc.tapis.tokens.client.TokensClient;
import edu.utexas.tacc.tapis.tokens.client.gen.model.InlineObject1.AccountTypeEnum;
import edu.utexas.tacc.tapis.tokens.client.model.CreateTokenParms;
import edu.utexas.tacc.tapis.tokens.client.model.RefreshTokenParms;
import edu.utexas.tacc.tapis.tokens.client.model.TapisRefreshToken;
import edu.utexas.tacc.tapis.tokens.client.model.TokenResponsePackage;

/** This class fetches a single service token and will refresh that token indefinitely.
/**
* This class fetches a single service token and will refresh that token indefinitely.
* Services use the getAccessJWT() method to get the currently valid access JWT in
* serialized form. They can also call hasExpiredAccessToken() to determine whether
* this class instance should be discarded because the access token can no longer be
* used.
* serialized form. They can also call hasExpiredAccessToken() to determine whether this
* class instance should be discarded because the access token can no longer be used.
*
* The parameters passed to this class on construction cannot be changed, but any
* number of instances can be created for the same service. The service password is
* number of instances can be created for the same service. The service password is
* used only to acquire a new token from the Tokens service during construction and
* is not saved in this class's instances. If the constructor returns without
* is not saved in this class's instances. If the constructor returns without
* throwing an exception, then a newly minted access token has been received from
* the Tokens service and is ready for use.
*
* The default access and refresh token lifetimes are set in the ServiceJWTParms
* class by default, but can be overridden. The refresh time-to-live must be at least
* class by default, but can be overridden. The refresh time-to-live must be at least
* as long as the access token's, but making it much longer is of no use since no
* attempt to refresh the access token occur after the access token expires.
*
Expand Down Expand Up @@ -177,12 +179,11 @@ public long getAccessExpiresIn(String targetSite) {
/* **************************************************************************** */
/* Private Accessors */
/* **************************************************************************** */
// Generated access token information. There's no chance
// of the tokens package being null nor its access token.
private String getRefreshJWT(String targetSite) {
return _tokPkgMap.get(targetSite).getRefreshToken().getRefreshToken();
}

// Generated access token object. This should never be null.
private TapisRefreshToken getRefreshJWTObject(String targetSite) {
return _tokPkgMap.get(targetSite).getRefreshToken();
}

/* **************************************************************************** */
/* Public Methods */
/* **************************************************************************** */
Expand Down Expand Up @@ -339,12 +340,34 @@ private TokenResponsePackage createServiceJWT(String password, String targetSite
/* ---------------------------------------------------------------------------- */
/* refreshServiceJWT: */
/* ---------------------------------------------------------------------------- */
/**
* Refresh the service JWT using the refresh token.
* The TapisJWTExpirationException runtime exception is only thrown when the
* refresh token has expired before renewing the service's JWT. In this case,
* there's no point in this thread continuing since there's no way to renew the
* server JWT, so the runtime exception always gets processed by uncaughtException().
*
* @param targetSite the site where the tokens can be used
* @return the refreshed tokens
* @throws TapisException, TapisClientException recoverable errors
* @throws TapisJWTExpirationException unrecoverable error
*/
private TokenResponsePackage refreshServiceJWT(String targetSite)
throws TapisException, TapisClientException
throws TapisException, TapisClientException, TapisJWTExpirationException
{
// See if we still have a valid, unexpired refresh token.
var refreshJWT = getRefreshJWTObject(targetSite);
if (refreshJWT.getExpiresIn() < 0) {
// Create the expired refresh token message for later logging.
String msg = MsgUtils.getMsg("TAPIS_TOKEN_REFRESH_JWT_EXPIRED", targetSite, _serviceName,
refreshJWT.getExpiresAt(), Instant.now());
_log.error(msg);
throw new TapisJWTExpirationException(msg);
}

// Create and populate the client parameter object.
var refreshParms = new RefreshTokenParms();
refreshParms.setRefreshToken(getRefreshJWT(targetSite));
refreshParms.setRefreshToken(refreshJWT.getRefreshToken());

// Get the client.
var client = new TokensClient(_tokensBaseUrl);
Expand Down Expand Up @@ -497,17 +520,24 @@ private void startTokenRefreshThread()
/* ---------------------------------------------------------------------- */
/* uncaughtException: */
/* ---------------------------------------------------------------------- */
/** Note the unexpected death of our refresh thread. We just let it die
* and wait for the token to eventually expire, which will cause our service
* to become unhealthy.
/**
* Note the unexpected death of our refresh thread.
* Abort program since it has become impossible to refresh our service token.
*/
@Override
public void uncaughtException(Thread t, Throwable e)
{
// Record the error.
_log.error(MsgUtils.getMsg("TAPIS_THREAD_UNCAUGHT_EXCEPTION",
t.getName(), e.toString()));
// Record the error.
if (e instanceof TapisJWTExpirationException)
{
_log.error(MsgUtils.getMsg("TAPIS_ABORT_JWT_REFRESH_ERROR", _serviceName));
}
else
{
_log.error(MsgUtils.getMsg("TAPIS_THREAD_UNCAUGHT_EXCEPTION", t.getName(), e.toString()));
e.printStackTrace(); // stderr for emphasis
}
System.exit(1);
}

/* **************************************************************************** */
Expand Down Expand Up @@ -571,8 +601,9 @@ public void run()
* @param sleepMillis milliseconds to wait before trying to refresh
* @return true if all new access tokens were acquired before the current
* access tokens expired, false otherwise
* @throws TapisJWTExpirationException when the refresh and access tokens have expired
*/
private boolean refreshToken(long sleepMillis)
private boolean refreshToken(long sleepMillis) throws TapisJWTExpirationException
{
// Retry until the access token expires.
while (true) {
Expand Down Expand Up @@ -623,15 +654,19 @@ private boolean refreshToken(long sleepMillis)
localTokPkgMap.put(siteId, refreshServiceJWT(siteId));
_refreshJwtCount++;
}
// Rethrowing this exception will cause it to escape the
// refresh thread's run method. The thread's uncaught exception
// method will then log an error and shutdown the JVM.
catch (TapisJWTExpirationException e) {throw e;}
catch (Exception e) {
// Log the exception.
String msg = MsgUtils.getMsg("TAPIS_TOKEN_REFRESH_ERROR",
_serviceName, _tenant,
Thread.currentThread().getName(),
sleepMillis);
_log.error(msg, e);
refreshFailed = true;
_refreshJwtFailedCount++;
_log.error(msg, e);
refreshFailed = true;
_refreshJwtFailedCount++;
}
}

Expand Down Expand Up @@ -694,7 +729,8 @@ private long calculateNewTokenWaitMillis()
/* ---------------------------------------------------------------------- */
/* calculateRetryMillis: */
/* ---------------------------------------------------------------------- */
/** After the first attempt at a refresh fails, we retry every retry_millis
/**
* After the first attempt at a refresh fails, we retry every retry_millis
* (or less if the access token would expire before retry_millis) until the
* access token expires.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,10 @@ TAPIS_TOKEN_REFRESH_TIMEOUT=TAPIS_TOKEN_REFRESH_TIMEOUT Token refresh thread {2}
TAPIS_TOKEN_REFRESH_ERROR=TAPIS_TOKEN_REFRESH_ERROR Token refresh failed on thread {2} for {0}@{1} after waiting {3} milliseconds.
# 0 = user, 1 = tenant, 2 = thread name, 3 = sleep millis
TAPIS_TOKEN_REFRESH_WAIT=TAPIS_TOKEN_REFRESH_WAIT Thread {2} waiting {3} milliseconds before refreshing token for {0}@{1}.
# 0 = target site, 1 = service name, 2 = refresh token expiry time (UTC), 3 = current time
TAPIS_TOKEN_REFRESH_JWT_EXPIRED=TAPIS_TOKEN_REFRESH_JWT_EXPIRED Unable to renew service token. Refresh token expired. Site: {0} Service: {1} Refresh token expiry time (UTC): {2} Current time: {3}
# 0 = service name
TAPIS_ABORT_JWT_REFRESH_ERROR=TAPIS_ABORT_JWT_REFRESH_ERROR {0} service aborting due to a runtime service JWT refresh error.

# 0 = jwt user, 1 = jwt tenant, 2 = obo header tenant
TAPIS_SECURITY_ALLOWABLE_TENANT_ERROR=TAPIS_SECURITY_ALLOWABLE_TENANT_ERROR Unable to confirm that service {0} in tenant {1} can act on behalf of users in tenant {2}.
Expand Down