Skip to content

Commit

Permalink
Merge pull request #54 from europeana/EA-3852-verify-size-of-translation
Browse files Browse the repository at this point in the history
eTransl text size limit to 5000, after that use a document-based
  • Loading branch information
gsergiu authored Jun 4, 2024
2 parents 96aaf76 + dc8527e commit 2551e1c
Show file tree
Hide file tree
Showing 9 changed files with 363 additions and 128 deletions.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@ public class RedisMessageListener implements MessageListener {

private static final Logger LOGGER = LogManager.getLogger(RedisMessageListener.class);
private String message;
//if true, the message received will be a document (e.g. from the eTranslation), otherwise a text-snippet
private boolean messageAsDocument;

public RedisMessageListener(boolean messageAsDocument) {
super();
this.messageAsDocument = messageAsDocument;
}

@Override
public void onMessage(Message message, byte[] pattern) {
synchronized(this) {
Expand All @@ -26,15 +33,20 @@ public void onMessage(Message message, byte[] pattern) {
this.message=messageBody;
}
else {
/*
* the received message is treated as a json object and we need some adjustments for the escaped characters
* (this only applies if we get the translated text from the translated-text field in the eTransl callback,
* which happens if we send the text to be translated in the textToTranslate request param)
*/
//remove double quotes at the beginning and at the end of the response, from some reason they are duplicated
String messageRemDuplQuotes = messageBody.replaceAll("^\"|\"$", "");
//replace a double backslash with a single backslash
this.message = messageRemDuplQuotes.replace("\\n", "\n");
if(messageAsDocument) {
this.message = messageBody;
}
else {
/*
* the received message is treated as a json object and we need some adjustments for the escaped characters
* (this only applies if we get the translated text from the translated-text field in the eTransl callback,
* which happens if we send the text to be translated in the textToTranslate request param)
*/
//remove double quotes at the beginning and at the end of the response, from some reason they are duplicated
String messageRemDuplQuotes = messageBody.replaceAll("^\"|\"$", "");
//replace a double backslash with a single backslash
this.message = messageRemDuplQuotes.replace("\\n", "\n");
}
}

//notify all threads waiting on this object
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ static void setProperties(DynamicPropertyRegistry registry) {
registry.add("redis.connection.url", () -> "redis://localhost:" + redisPort + "/");
registry.add("translation.eTranslation.baseUrl", () -> ETranslationTranslationService.baseUrlTests);
registry.add("translation.eTranslation.credentials", () -> "");
registry.add("translation.eTranslation.truncate", () -> false);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ public abstract class IntegrationTestUtils {
public static final String TRANSLATION_PANGEANIC_RESPONSE_2 = "/content/pangeanic/translate/translate_pangeanic_response_2.json";

public static final String TRANSLATION_REQUEST_E_TRANSLATION = "/content/translation_request_eTranslation.json";
public static final String TRANSLATION_REQUEST_E_TRANSLATION_LONGER = "/content/translation_request_eTranslation_longer.json";

public static final String TRANSLATION_REQUEST_PANGEANIC_MULTIPLE_LANG = "/content/translation_pangeanic_multiple_languages_request.json";
public static final String TRANSLATION_PANGEANIC_REQUEST_MULTIPLE_LANG_DE = "/content/pangeanic/translate/translate_pangeanic_multiple_languages_request_DE.json";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,9 @@ void translationPangeanic() throws Exception {
assertNotNull(serviceFieldValue);
}

class eTranslationSimulatorThread implements Runnable {
class eTranslationSimulatorThreadForTextSnippetTranslation implements Runnable {
private MockMvc mockMvc;
public eTranslationSimulatorThread(MockMvc mockMvc) {
public eTranslationSimulatorThreadForTextSnippetTranslation(MockMvc mockMvc) {
this.mockMvc = mockMvc;
}
@Override
Expand Down Expand Up @@ -156,15 +156,54 @@ public void run() {
}
}
}


class eTranslationSimulatorThreadForDocumentTranslation implements Runnable {
private MockMvc mockMvc;
public eTranslationSimulatorThreadForDocumentTranslation(MockMvc mockMvc) {
this.mockMvc = mockMvc;
}
@Override
public void run() {
try {
String requestJson = getJsonStringInput(TRANSLATION_REQUEST_E_TRANSLATION_LONGER);
String result = mockMvc
.perform(
post(BASE_URL_TRANSLATE)
.header(HttpHeaders.ACCEPT, MediaType.APPLICATION_JSON_VALUE)
.header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE)
.content(requestJson))
.andExpect(status().isOk())
.andReturn().getResponse().getContentAsString();

assertNotNull(result);
JSONObject json = new JSONObject(result);
String langFieldValue = json.getString(TranslationAppConstants.LANG);
assertEquals(LANGUAGE_EN, langFieldValue);

List<String> translations = Collections.singletonList(json.getString(TranslationAppConstants.TRANSLATIONS));
assertTrue(translations.contains("test first line in German, eight on caching, no cache 1!")
&& translations.contains("a second text in German, can be cached....")
&& translations.contains("a second text in German, can be cached....")
&& translations.contains("a third text in German, such as this")
&& translations.contains("and a fourth text such as: today’s news on www.heute.at"));
String serviceFieldValue = json.getString(TranslationAppConstants.SERVICE);
assertNotNull(serviceFieldValue);
} catch (Exception e) {
}
}
}

/*
* eTranslation that uses a text snippet based translation
*/
@Test
void translationETranslation() throws Exception {
Thread thread = new Thread(new eTranslationSimulatorThread(mockMvc));
void translationETranslationTextSnippet() throws Exception {
Thread thread = new Thread(new eTranslationSimulatorThreadForTextSnippetTranslation(mockMvc));
thread.start();
Thread.sleep(1000);
//trigger the eTranslation callback manually
//computed in advance using the code in the eTransl service
String eTranslRef="et:deen0E3jxg";
String eTranslRef="et:deenre7d+w";
StringBuilder translatedText=new StringBuilder();
translatedText.append("That is my dog.");
translatedText.append(ETranslationTranslationService.markupDelimiter);
Expand All @@ -174,13 +213,42 @@ void translationETranslation() throws Exception {
.perform(
post(ETranslationTranslationService.eTranslationCallbackRelativeUrl).characterEncoding(StandardCharsets.UTF_8)
.param("external-reference", eTranslRef)
.param("request-id", "1")
.param("translated-text", translatedText.toString()))
.andExpect(status().isOk());

thread.join();

}

/*
* eTranslation that uses a document based translation
*/
@Test
void translationETranslationDocument() throws Exception {
Thread thread = new Thread(new eTranslationSimulatorThreadForDocumentTranslation(mockMvc));
thread.start();
Thread.sleep(1000);
//trigger the eTranslation callback manually
//computed in advance using the code in the eTransl service
String eTranslRef="et:deen9Aie9A";
//base64 encoded translations
String translatedText="dGVzdCBmaXJzdCBsaW5lIGluIEdlcm1hbiwgZWlnaHQgb24gY2FjaGluZywgbm8gY2FjaGUgMSEKW25vdHJhbnNsYXRlXWRlZW5QVnNhT2dbL25vdHJhbnNsYXRlXQphIHNlY29uZCB0ZXh0IGluIEdlcm1hbiwgY2FuIGJlIGNhY2hlZC4uLi4KW25vdHJhbnNsYXRlXWRlZW5QVnNhT2dbL25vdHJhbnNsYXRlXQphIHRoaXJkIHRleHQgaW4gR2VybWFuLCBzdWNoIGFzIHRoaXMKW25vdHJhbnNsYXRlXWRlZW5QVnNhT2dbL25vdHJhbnNsYXRlXQphbmQgYSBmb3VydGggdGV4dCBzdWNoIGFzOiB0b2RheeKAmXMgbmV3cyBvbiB3d3cuaGV1dGUuYXQ=";

mockMvc
.perform(
post(ETranslationTranslationService.eTranslationCallbackRelativeUrl).characterEncoding(StandardCharsets.UTF_8)
.param("external-reference", eTranslRef)
.param("request-id", "1")
.content(translatedText))
.andExpect(status().isOk());

thread.join();

}



@Test
void translationPangeanicNoSrcMultipleLanguages() throws Exception {
String requestJson = getJsonStringInput(TRANSLATION_REQUEST_PANGEANIC_MULTIPLE_LANG);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"source": "de",
"target": "en",
"service": "ETRANSLATION",
"text": [
"erste Textzeile auf Deutsch test, achte auf caching, no cache 1!",
"einen zweiten Text auf Deutsch, kann gecached werden....",
"einen dritten Text auf Deutsch, wie zum Beispiel diesen",
"und einen vierten Text wie: heutige Nachrichten auf www.heute.at"
],
"caching":false
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ public class TranslationConfig{

@Value("${translation.eTranslation.baseUrl:#{null}}")
private String etranslationBaseUrl;

@Value("${translation.eTranslation.truncate:#{null}}")
private boolean etranslationTruncate;

@Value("${translation.dummy.services:false}")
private boolean useDummyServices;
Expand Down Expand Up @@ -181,5 +184,9 @@ public int getEtranslationMaxWaitMillisec() {
public String getEtranslationBaseUrl() {
return etranslationBaseUrl;
}

public boolean getEtranslationTruncate() {
return etranslationTruncate;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@ public ETranslationCallbackController(RedisTemplate<String, CachedTranslation> r
@PostMapping(value = ETranslationTranslationService.eTranslationCallbackRelativeUrl)
public void eTranslationCallbackPost(
@RequestParam(value = "target-language", required = false) String targetLanguage,
@RequestParam(value = "translated-text", required = true) String translatedTextSnippet,
@RequestParam(value = "translated-text", required = false) String translatedTextSnippet,
@RequestParam(value = "request-id", required = true) String requestId,
@RequestParam(value = "external-reference", required = true) String externalReference) {
@RequestParam(value = "external-reference", required = true) String externalReference,
@RequestBody(required = false) String body) {

if (LOGGER.isDebugEnabled()) {
LOGGER.debug(
Expand All @@ -44,8 +45,14 @@ public void eTranslationCallbackPost(
LoggingUtils.sanitizeUserInput(requestId),
LoggingUtils.sanitizeUserInput(externalReference));
}
if (externalReference != null && translatedTextSnippet != null) {
redisTemplate.convertAndSend(externalReference, translatedTextSnippet);
/*
* in case we send a document for the translation, we get the output in the body, or otherwise,
* if we send a text snippet in the text-to-translate field, we ge the output in the translated-text parameter
* (although also extracted from the body)
*/
String translations = translatedTextSnippet!=null ? translatedTextSnippet : body;
if(externalReference!=null && translations!=null) {
redisTemplate.convertAndSend(externalReference, translations);
}
}

Expand All @@ -56,8 +63,7 @@ public ResponseEntity<String> eTranslationCallbackGet(
@RequestParam(value = "translated-text", required = false) String translatedTextSnippet,
@RequestParam(value = "request-id", required = false) String requestId,
@RequestParam(value = "external-reference", required = false) String externalReference,
@RequestParam(value = "timeout", required = false) Integer timeout,
@RequestBody(required = false) String body) throws InterruptedException {
@RequestParam(value = "timeout", required = false) Integer timeout) throws InterruptedException {

if (timeout != null && timeout > 0) {
// for simulation purposes, wait for $timeout seconds
Expand All @@ -78,7 +84,6 @@ public ResponseEntity<String> eTranslationCallbackGet(

return ResponseEntity.status(HttpStatus.ACCEPTED).build();


}

@Tag(description = "ETranslation error callback endpoint", name = "eTranslationErrorCallback")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
import java.util.List;
import java.util.stream.Collectors;
import javax.annotation.PreDestroy;

import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import eu.europeana.api.commons.error.EuropeanaI18nApiException;
import eu.europeana.api.translation.config.TranslationConfig;
import eu.europeana.api.translation.config.TranslationServiceProvider;
import eu.europeana.api.translation.config.services.TranslationLangPairCfg;
import eu.europeana.api.translation.definitions.language.LanguagePair;
Expand All @@ -19,11 +20,15 @@
import eu.europeana.api.translation.definitions.model.TranslationRequest;
import eu.europeana.api.translation.definitions.model.TranslationResponse;
import eu.europeana.api.translation.service.TranslationService;
import eu.europeana.api.translation.service.etranslation.ETranslationTranslationService;
import eu.europeana.api.translation.service.exception.TranslationException;
import eu.europeana.api.translation.service.util.TranslationUtils;
import eu.europeana.api.translation.web.exception.ParamValidationException;

@Service
public class TranslationWebService extends BaseWebService {

@Autowired protected TranslationConfig translationConfig;

@Autowired
private final TranslationServiceProvider translationServiceProvider;
Expand Down Expand Up @@ -130,17 +135,98 @@ CachedTranslationService instantiateCachedTranslationService(boolean useCaching,
}
}

/*
* This method is used only for the purpose of eTranslation stress test and can be excluded afterwards
*/
private void limitTextSizeForETranslationStressTest(TranslationRequest translationRequest, List<TranslationObj> translObjs) {
StringBuilder translJointString = new StringBuilder(TranslationUtils.STRING_BUILDER_INIT_SIZE);
for (String inputText : translationRequest.getText()) {
if(translJointString.isEmpty()) {
if(inputText.length() <= ETranslationTranslationService.eTranslationTextSnippetLimit) {
translJointString.append(inputText);

TranslationObj newTranslObj = new TranslationObj();
newTranslObj.setSourceLang(translationRequest.getSource());
newTranslObj.setTargetLang(translationRequest.getTarget());
newTranslObj.setText(inputText);
newTranslObj.setTranslated(false); // not translated yet hence set to false
translObjs.add(newTranslObj);

}
else {
String truncatedInput=inputText.substring(0, ETranslationTranslationService.eTranslationTextSnippetLimit);
translJointString.append(truncatedInput);

TranslationObj newTranslObj = new TranslationObj();
newTranslObj.setSourceLang(translationRequest.getSource());
newTranslObj.setTargetLang(translationRequest.getTarget());
newTranslObj.setText(truncatedInput);
newTranslObj.setTranslated(false); // not translated yet hence set to false
translObjs.add(newTranslObj);

break;
}
}
else {
int charsLeft=ETranslationTranslationService.eTranslationTextSnippetLimit - translJointString.length();
int charsLeftForNewText=charsLeft-ETranslationTranslationService.markupDelimiter.length();
if(charsLeftForNewText>0) {
if(charsLeftForNewText>=inputText.length()) {
translJointString.append(ETranslationTranslationService.markupDelimiter);
translJointString.append(inputText);

TranslationObj newTranslObj = new TranslationObj();
newTranslObj.setSourceLang(translationRequest.getSource());
newTranslObj.setTargetLang(translationRequest.getTarget());
newTranslObj.setText(inputText);
newTranslObj.setTranslated(false); // not translated yet hence set to false
translObjs.add(newTranslObj);
}
else if(StringUtils.isNotEmpty(inputText.substring(0, charsLeftForNewText))) {
String truncatedInput=inputText.substring(0, charsLeftForNewText);
translJointString.append(ETranslationTranslationService.markupDelimiter);
translJointString.append(truncatedInput);

TranslationObj newTranslObj = new TranslationObj();
newTranslObj.setSourceLang(translationRequest.getSource());
newTranslObj.setTargetLang(translationRequest.getTarget());
newTranslObj.setText(truncatedInput);
newTranslObj.setTranslated(false); // not translated yet hence set to false
translObjs.add(newTranslObj);

break;
}
else {
break;
}
}
else {
break;
}
}
}
}

private List<TranslationObj> buildTranslationObjectList(TranslationRequest translationRequest) {
// create a list of objects to be translated
List<TranslationObj> translObjs = new ArrayList<TranslationObj>(translationRequest.getText().size());
for (String inputText : translationRequest.getText()) {
TranslationObj newTranslObj = new TranslationObj();
newTranslObj.setSourceLang(translationRequest.getSource());
newTranslObj.setTargetLang(translationRequest.getTarget());
newTranslObj.setText(inputText);
newTranslObj.setTranslated(false); // not translated yet hence set to false
translObjs.add(newTranslObj);
if(translationConfig.getEtranslationTruncate()
&& ETranslationTranslationService.serviceIdDefault.equals(translationRequest.getService())
) {
limitTextSizeForETranslationStressTest(translationRequest, translObjs);
}
else {
//when we do not need the above method limitTextSizeForETranslationStressTest, leave just this for loop (as was before)
for (String inputText : translationRequest.getText()) {
TranslationObj newTranslObj = new TranslationObj();
newTranslObj.setSourceLang(translationRequest.getSource());
newTranslObj.setTargetLang(translationRequest.getTarget());
newTranslObj.setText(inputText);
newTranslObj.setTranslated(false); // not translated yet hence set to false
translObjs.add(newTranslObj);
}
}

return translObjs;
}

Expand Down

0 comments on commit 2551e1c

Please sign in to comment.