Skip to content

Commit

Permalink
MET-6211 Remove deprecations in metis indexing and cleanup (#698)
Browse files Browse the repository at this point in the history
* MET-6211 Remove deprecations in metis indexing and cleanup

* MET-6211 Process review
  • Loading branch information
stzanakis authored Oct 11, 2024
1 parent f14e972 commit 83709cc
Show file tree
Hide file tree
Showing 11 changed files with 215 additions and 67 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package eu.europeana.metis.authentication.rest.controller;

import static eu.europeana.metis.utils.CommonStringValues.CRLF_PATTERN;
import static eu.europeana.metis.utils.CommonStringValues.sanitizeCRLF;

import eu.europeana.metis.authentication.service.AuthenticationService;
import eu.europeana.metis.authentication.user.AccountRole;
Expand Down Expand Up @@ -181,8 +181,7 @@ public void deleteUser(@RequestHeader("Authorization") String authorization,
}
authenticationService.deleteUser(emailParameter.getEmail());
if (LOGGER.isInfoEnabled()) {
LOGGER.info("User with email: {} deleted",
CRLF_PATTERN.matcher(emailParameter.getEmail()).replaceAll(""));
LOGGER.info("User with email: {} deleted", sanitizeCRLF(emailParameter.getEmail()));
}
}

Expand All @@ -208,15 +207,13 @@ public void updateUserToMakeAdmin(@RequestHeader("Authorization") String authori
if (emailParameter == null || StringUtils.isBlank(emailParameter.getEmail())) {
throw new BadContentException("userEmailToMakeAdmin is empty");
}
String accessToken = authenticationService
.validateAuthorizationHeaderWithAccessToken(authorization);
final String accessToken = authenticationService.validateAuthorizationHeaderWithAccessToken(authorization);
if (!authenticationService.isUserAdmin(accessToken)) {
throw new UserUnauthorizedException(ACTION_NOT_ALLOWED_FOR_USER);
}
authenticationService.updateUserMakeAdmin(emailParameter.getEmail());
if (LOGGER.isInfoEnabled()) {
LOGGER.info("User with email: {} made admin",
CRLF_PATTERN.matcher(emailParameter.getEmail()).replaceAll(""));
LOGGER.info("User with email: {} made admin", sanitizeCRLF(emailParameter.getEmail()));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,19 @@ public final class CommonStringValues {

public static final String REPLACEABLE_CRLF_CHARACTERS_REGEX = "[\r\n\t]";

public static final Pattern CRLF_PATTERN = Pattern.compile(CommonStringValues.REPLACEABLE_CRLF_CHARACTERS_REGEX);
public static final Pattern CRLF_PATTERN = Pattern.compile(REPLACEABLE_CRLF_CHARACTERS_REGEX);

private CommonStringValues() {
}

/**
* Sanitized input value from Logging injection attacks(javasecurity:S5145).
* <p>Replaces CR and LF characters with a safe value e.g. ""(empty string).</p>
*
* @param input the input
* @return the sanitized input, safe for logging
*/
public static String sanitizeCRLF(String input) {
return input == null ? null : CRLF_PATTERN.matcher(input).replaceAll("");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,39 @@

/**
* Enum for depublication reason.
* <p>Note: The enum value {@link #UNKNOWN} is to be used for historical depublication workflows(before the reason was
* implemented).
* In other words the historical workflows will be populated by a script once with the {@link #UNKNOWN} reason, and this value
* should never be used during depublication since its release. Therefore the url is an empty string and not meant to be used for
* populating records in the database(e.g. tombstoning)</p>
*/
public enum DepublicationReason {

BROKEN_MEDIA_LINKS("Broken media links", "http://data.europeana.eu/vocabulary/depublicationReason/contentTier0"),
GDPR("GDPR", "http://data.europeana.eu/vocabulary/depublicationReason/gdpr"),
PERMISSION_ISSUES("Permission issues", "http://data.europeana.eu/vocabulary/depublicationReason/noPermission"),
SENSITIVE_CONTENT("Sensitive content", "http://data.europeana.eu/vocabulary/depublicationReason/sensitiveContent"),
REMOVED_DATA_AT_SOURCE("Removed data at source", "http://data.europeana.eu/vocabulary/depublicationReason/sourceRemoval"),
GENERIC("Generic", "http://data.europeana.eu/vocabulary/depublicationReason/generic"),
UNKNOWN("Unknown", "http://data.europeana.eu/vocabulary/depublicationReason/unknown");
BROKEN_MEDIA_LINKS("Broken media links", "contentTier0"),
GDPR("GDPR", "gdpr"),
PERMISSION_ISSUES("Permission issues", "noPermission"),
SENSITIVE_CONTENT("Sensitive content", "sensitiveContent"),
REMOVED_DATA_AT_SOURCE("Removed data at source", "sourceRemoval"),
GENERIC("Generic", "generic"),
UNKNOWN("Unknown", "");

private final String valueAsString;
private static final String BASE_URL = "http://data.europeana.eu/vocabulary/depublicationReason/";

private final String title;
private final String url;

DepublicationReason(String valueAsString, String url) {
this.valueAsString = valueAsString;
this.url = url;
DepublicationReason(String title, String urlSuffix) {
this.title = title;
this.url = BASE_URL + urlSuffix;
}

@Override
public String toString(){
return valueAsString;
public String toString() {
return title;
}

public String getTitle() {
return title;
}

public String getUrl() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package eu.europeana.metis.utils;

import static eu.europeana.metis.utils.CommonStringValues.BATCH_OF_DATASETS_RETURNED;
import static eu.europeana.metis.utils.CommonStringValues.CRLF_PATTERN;
import static eu.europeana.metis.utils.CommonStringValues.DATE_FORMAT;
import static eu.europeana.metis.utils.CommonStringValues.DATE_FORMAT_FOR_REQUEST_PARAM;
import static eu.europeana.metis.utils.CommonStringValues.DATE_FORMAT_FOR_SCHEDULING;
import static eu.europeana.metis.utils.CommonStringValues.DATE_FORMAT_Z;
import static eu.europeana.metis.utils.CommonStringValues.EUROPEANA_ID_CREATOR_INITIALIZATION_FAILED;
import static eu.europeana.metis.utils.CommonStringValues.NEXT_PAGE_CANNOT_BE_NEGATIVE;
import static eu.europeana.metis.utils.CommonStringValues.PAGE_COUNT_CANNOT_BE_ZERO_OR_NEGATIVE;
import static eu.europeana.metis.utils.CommonStringValues.PLUGIN_EXECUTION_NOT_ALLOWED;
import static eu.europeana.metis.utils.CommonStringValues.REPLACEABLE_CRLF_CHARACTERS_REGEX;
import static eu.europeana.metis.utils.CommonStringValues.S_DATA_PROVIDERS_S_DATA_SETS_S_TEMPLATE;
import static eu.europeana.metis.utils.CommonStringValues.UNAUTHORIZED;
import static eu.europeana.metis.utils.CommonStringValues.WRONG_ACCESS_TOKEN;
import static eu.europeana.metis.utils.CommonStringValues.sanitizeCRLF;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;

import java.util.regex.Pattern;
import org.junit.jupiter.api.Test;

class CommonStringValuesTest {

@Test
void testFieldsAreUsed() {
assertNotNull(WRONG_ACCESS_TOKEN);
assertNotNull(BATCH_OF_DATASETS_RETURNED);
assertNotNull(NEXT_PAGE_CANNOT_BE_NEGATIVE);
assertNotNull(PAGE_COUNT_CANNOT_BE_ZERO_OR_NEGATIVE);
assertNotNull(PLUGIN_EXECUTION_NOT_ALLOWED);
assertNotNull(UNAUTHORIZED);
assertNotNull(EUROPEANA_ID_CREATOR_INITIALIZATION_FAILED);
assertNotNull(DATE_FORMAT);
assertNotNull(DATE_FORMAT_Z);
assertNotNull(DATE_FORMAT_FOR_SCHEDULING);
assertNotNull(DATE_FORMAT_FOR_REQUEST_PARAM);
assertNotNull(S_DATA_PROVIDERS_S_DATA_SETS_S_TEMPLATE);
assertNotNull(REPLACEABLE_CRLF_CHARACTERS_REGEX);
assertNotNull(CRLF_PATTERN);
}

@Test
void testPattern() {
Pattern expectedPattern = Pattern.compile("[\r\n\t]");
assertEquals(expectedPattern.pattern(), CRLF_PATTERN.pattern());
}

@Test
void testSanitizeCRLF_NullInput() {
assertNull(sanitizeCRLF(null));
}

@Test
void testSanitizeStringForLogging_EmptyString() {
String input = "";
assertEquals("", sanitizeCRLF(input));
}

@Test
void testSanitizeCRLF_NoSpecialCharacters() {
String input = "This is a test.";
assertEquals("This is a test.", sanitizeCRLF(input));
}

@Test
void testSanitizeCRLF_WithCRLFCharacters() {
String input = "This is a test.\nThis is a new line.\rThis is a carriage return.\tThis is a tab.";
String expected = "This is a test.This is a new line.This is a carriage return.This is a tab.";
assertEquals(expected, sanitizeCRLF(input));
}

@Test
void testSanitizeCRLF_MixedInput() {
String input = "\r\n\tThis string has special characters at the start.\r\n";
String expected = "This string has special characters at the start.";
assertEquals(expected, sanitizeCRLF(input));
}

@Test
void testSanitizeCRLF_NoCRLFCharacters() {
String input = "Regular string without CRLF.";
assertEquals("Regular string without CRLF.", sanitizeCRLF(input));
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package eu.europeana.metis.utils;

import static eu.europeana.metis.utils.DepublicationReason.BROKEN_MEDIA_LINKS;
import static eu.europeana.metis.utils.DepublicationReason.GDPR;
import static eu.europeana.metis.utils.DepublicationReason.GENERIC;
import static eu.europeana.metis.utils.DepublicationReason.PERMISSION_ISSUES;
import static eu.europeana.metis.utils.DepublicationReason.REMOVED_DATA_AT_SOURCE;
import static eu.europeana.metis.utils.DepublicationReason.SENSITIVE_CONTENT;
import static eu.europeana.metis.utils.DepublicationReason.UNKNOWN;
import static eu.europeana.metis.utils.DepublicationReason.values;
import static java.util.Arrays.asList;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.Arrays;
import java.util.List;
import org.junit.jupiter.api.Test;

class DepublicationReasonTest {

@Test
void testValues() {
Arrays.stream(values()).forEach(depublicationReason -> {
assertNotNull(depublicationReason.getTitle());
assertNotNull(depublicationReason.getUrl());
});
}

@Test
void testToStringMethod() {
assertEquals("Broken media links", BROKEN_MEDIA_LINKS.toString());
assertEquals("GDPR", GDPR.toString());
assertEquals("Permission issues", PERMISSION_ISSUES.toString());
assertEquals("Sensitive content", SENSITIVE_CONTENT.toString());
assertEquals("Removed data at source", REMOVED_DATA_AT_SOURCE.toString());
assertEquals("Generic", GENERIC.toString());
assertEquals("Unknown", UNKNOWN.toString());
}

@Test
void testEnumValuePresence() {
List<DepublicationReason> depublicationReasons = asList(values());
assertEquals(7, depublicationReasons.size());

assertTrue(depublicationReasons.contains(BROKEN_MEDIA_LINKS));
assertTrue(depublicationReasons.contains(GDPR));
assertTrue(depublicationReasons.contains(PERMISSION_ISSUES));
assertTrue(depublicationReasons.contains(SENSITIVE_CONTENT));
assertTrue(depublicationReasons.contains(REMOVED_DATA_AT_SOURCE));
assertTrue(depublicationReasons.contains(GENERIC));
assertTrue(depublicationReasons.contains(UNKNOWN));
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package eu.europeana.metis.dereference;

import static eu.europeana.metis.utils.CommonStringValues.CRLF_PATTERN;
import static eu.europeana.metis.utils.CommonStringValues.sanitizeCRLF;

import eu.europeana.metis.exception.BadContentException;
import java.io.ByteArrayInputStream;
Expand Down Expand Up @@ -105,8 +105,7 @@ private Optional<String> getValidatedXml(String resourceId, String xml) throws B
if (isEmptyXml(xml)) {
xmlResponse = Optional.empty();
if (LOGGER.isInfoEnabled()) {
LOGGER.info("Transformed entity {} results to an empty XML.",
CRLF_PATTERN.matcher(resourceId).replaceAll(""));
LOGGER.info("Transformed entity {} results to an empty XML.", sanitizeCRLF(resourceId));
}
} else {
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package eu.europeana.metis.dereference.service;

import static eu.europeana.metis.utils.CommonStringValues.CRLF_PATTERN;
import static eu.europeana.metis.utils.CommonStringValues.sanitizeCRLF;

import eu.europeana.enrichment.api.external.DereferenceResultStatus;
import eu.europeana.enrichment.api.external.model.Concept;
Expand Down Expand Up @@ -284,7 +284,7 @@ private OriginalEntity retrieveOriginalEntity(String resourceId, Set<String> pot

// Evaluate and return the result.
if (originalEntity == null && LOGGER.isInfoEnabled()) {
LOGGER.info("No entity XML for uri {}", CRLF_PATTERN.matcher(resourceId).replaceAll(""));
LOGGER.info("No entity XML for uri {}", sanitizeCRLF(resourceId));
}
final DereferenceResultStatus dereferenceResultStatus = originalEntity == null ?
DereferenceResultStatus.NO_ENTITY_FOR_VOCABULARY : DereferenceResultStatus.SUCCESS;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package eu.europeana.metis.dereference.service.utils;

import static eu.europeana.metis.utils.CommonStringValues.CRLF_PATTERN;
import static eu.europeana.metis.utils.CommonStringValues.sanitizeCRLF;

import eu.europeana.metis.dereference.Vocabulary;
import java.net.URI;
Expand Down Expand Up @@ -71,10 +71,10 @@ public static VocabularyCandidates findVocabulariesForUrl(String resourceId,

// Log and done.
if (candidates.isEmpty() && (LOGGER.isInfoEnabled())) {
LOGGER.info("No vocabularies found for uri {}", CRLF_PATTERN.matcher(resourceId).replaceAll(""));
LOGGER.info("No vocabularies found for uri {}", sanitizeCRLF(resourceId));
}
if (candidates.size() > 1 && LOGGER.isWarnEnabled()) {
LOGGER.warn("Multiple vocabularies found for uri {}: {}", CRLF_PATTERN.matcher(resourceId).replaceAll(""),
LOGGER.warn("Multiple vocabularies found for uri {}: {}", sanitizeCRLF(resourceId),
candidates.stream().map(Vocabulary::getName).collect(Collectors.joining(", ")));
}
return new VocabularyCandidates(candidates);
Expand Down
30 changes: 9 additions & 21 deletions metis-indexing/src/main/java/eu/europeana/indexing/Indexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,18 @@ public interface Indexer extends Closeable {

/**
* <p>
* This method indexes a single record, publishing it to the provided data stores.
* This method indexes a single rdf, publishing it to the provided data stores.
* </p>
* <p>
* <b>NOTE:</b> this operation should not coincide with a remove operation as this operation is
* not done within a transaction.
* </p>
*
* @param record The record to index.
* @param rdf The rdf to index.
* @param indexingProperties The properties of this indexing operation.
* @throws IndexingException In case a problem occurred during indexing.
*/
void indexRdf(RDF record, IndexingProperties indexingProperties) throws IndexingException;
void indexRdf(RDF rdf, IndexingProperties indexingProperties) throws IndexingException;

/**
* <p>
Expand All @@ -60,18 +60,18 @@ public interface Indexer extends Closeable {

/**
* <p>
* This method indexes a single record, publishing it to the provided data stores.
* This method indexes a single rdfString, publishing it to the provided data stores.
* </p>
* <p>
* <b>NOTE:</b> this operation should not coincide with a remove operation as this operation is
* not done within a transaction.
* </p>
*
* @param record The record to index (can be parsed to RDF).
* @param rdfString The rdfString to index (can be parsed to RDF).
* @param indexingProperties The properties of this indexing operation.
* @throws IndexingException In case a problem occurred during indexing.
*/
void index(String record, IndexingProperties indexingProperties) throws IndexingException;
void index(String rdfString, IndexingProperties indexingProperties) throws IndexingException;

/**
* <p>
Expand Down Expand Up @@ -110,18 +110,18 @@ void index(String stringRdfRecord, IndexingProperties indexingProperties,

/**
* <p>
* This method indexes a single record, publishing it to the provided data stores.
* This method indexes a single rdfInputStream, publishing it to the provided data stores.
* </p>
* <p>
* <b>NOTE:</b> this operation should not coincide with a remove operation as this operation is
* not done within a transaction.
* </p>
*
* @param record The record to index (can be parsed to RDF).
* @param rdfInputStream The rdfInputStream to index (can be parsed to RDF).
* @param indexingProperties The properties of this indexing operation.
* @throws IndexingException In case a problem occurred during indexing.
*/
void index(InputStream record, IndexingProperties indexingProperties) throws IndexingException;
void index(InputStream rdfInputStream, IndexingProperties indexingProperties) throws IndexingException;

/**
* <p>
Expand Down Expand Up @@ -188,18 +188,6 @@ TierResults indexAndGetTierCalculations(InputStream recordContent,
*/
FullBeanImpl getTombstone(String rdfAbout);

/**
* Creates and indexes a tombstone record.
*
* @param rdfAbout the id of the record
* @return whether a record was tombstoned
* @throws IndexingException in case something went wrong.
* @deprecated Use {@link #indexTombstone(String, DepublicationReason)}.
*/
//TODO: 2024-09-24 - Remove once ecloud has updated the code for tombstoning
@Deprecated(since = "13-SNAPSHOT", forRemoval = true)
boolean indexTombstone(String rdfAbout) throws IndexingException;

/**
* Creates and indexes a tombstone record.
*
Expand Down
Loading

0 comments on commit 83709cc

Please sign in to comment.