Skip to content

Commit

Permalink
improve pangeanic translation service
Browse files Browse the repository at this point in the history
  • Loading branch information
StevaneticS authored and StevaneticS committed Nov 21, 2023
1 parent f4f2df8 commit 3d448ca
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public interface TranslationService {
*/
void translate(List<TranslationObj> translationObjs, boolean detectLanguages) throws TranslationException;

void detectLanguages(List<TranslationObj> translationObjs, List<Integer> validIndexes) throws TranslationException;
void detectLanguages(List<TranslationObj> translationObjs) throws TranslationException;

@Deprecated
/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ public void close() {
}

@Override
public void detectLanguages(List<TranslationObj> translationObjs, List<Integer> validIndexes)
public void detectLanguages(List<TranslationObj> translationObjs)
throws TranslationException {
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,31 +104,12 @@ public void translate(List<TranslationObj> translationObjs, boolean detectLangua
try {
if(translationObjs.isEmpty()) return;

/*here we analyze the general case where some texts may have the source lang, some may not, and
* the source languages may be different, but only for the texts that already do not have the translations
* (maybe fetched from the cache or computed with another service)
*/
if(detectLanguages) {
List<Integer> validIndexesWithoutSourceLang = IntStream.range(0, translationObjs.size())
.filter(i -> translationObjs.get(i).getTranslation()==null && translationObjs.get(i).getSourceLang()==null)
.boxed()
.collect(Collectors.toList());
if(!validIndexesWithoutSourceLang.isEmpty()) {
detectLanguages(translationObjs, validIndexesWithoutSourceLang);
}
detectLanguages(translationObjs);
}

List<Integer> validIndexes = IntStream.range(0, translationObjs.size())
.filter(i -> translationObjs.get(i).getTranslation()==null)
.boxed()
.collect(Collectors.toList());
List<String> sourceLangs = new ArrayList<String>();
for(Integer validIndexWithSourceLangElem : validIndexes) {
sourceLangs.add(translationObjs.get(validIndexWithSourceLangElem).getSourceLang());
}
if(!validIndexes.isEmpty()) {
computeTranslations(translationObjs, sourceLangs, validIndexes);
}
computeTranslations(translationObjs);

}
catch (JSONException e) {
throw new TranslationException("Exception occured during Pangeanic translation!",
Expand All @@ -146,61 +127,74 @@ public List<String> translate(List<String> texts, String targetLanguage)
return translate(texts, targetLanguage, null);
}

private void computeTranslations(List<TranslationObj> translationObjs, List<String> detectedLanguages, List<Integer> validIndexes) throws JSONException, TranslationException {
if (LOG.isDebugEnabled()) {
LOG.debug(
"Pangeanic detect lang request with hint null is executed. Detected languages are {} ",
LoggingUtils.sanitizeUserInput(detectedLanguages.toString()));
}

private void computeTranslations(List<TranslationObj> translationObjs) throws JSONException, TranslationException {
List<String> analyzedLangs = new ArrayList<String>();
for(int i=0;i<detectedLanguages.size();i++) {
//take the same lang values and send a translation request with a list of texts belonging to that same lang
String sourceLang = detectedLanguages.get(i);
if(!analyzedLangs.contains(sourceLang)) {
for(int i=0;i<translationObjs.size();i++) {
if(translationObjs.get(i).getTranslation()==null) {
//take the same lang values and send a translation request with a list of texts belonging to that same lang
String sourceLang = translationObjs.get(i).getSourceLang();
List<Integer> translIndexes = new ArrayList<Integer>();
translIndexes.add(validIndexes.get(i));
translIndexes.add(i);
List<String> translTexts = new ArrayList<String>();
translTexts.add(translationObjs.get(validIndexes.get(i)).getText());
String targetLang = translationObjs.get(validIndexes.get(i)).getTargetLang();
if(sourceLang!=null) {
for(int j=i+1;j<detectedLanguages.size();j++) {
if(detectedLanguages.get(j)!=null && sourceLang.equals(detectedLanguages.get(j))) {
translIndexes.add(validIndexes.get(j));
translTexts.add(translationObjs.get(validIndexes.get(j)).getText());
translTexts.add(translationObjs.get(i).getText());
String targetLang = translationObjs.get(i).getTargetLang();

if(sourceLang!=null && !analyzedLangs.contains(sourceLang)) {
for(int j=i+1;j<translationObjs.size();j++) {
if(translationObjs.get(j).getTranslation()==null) {
String nextSourceLang = translationObjs.get(j).getSourceLang();
if(sourceLang.equals(nextSourceLang)) {
translIndexes.add(j);
translTexts.add(translationObjs.get(j).getText());
}
}
}
analyzedLangs.add(sourceLang);
}

//send the request
HttpPost translateRequest = PangeanicTranslationUtils.createTranslateRequest(
getExternalServiceEndPoint(), translTexts, targetLang, sourceLang, "");
sendTranslateRequestAndParse(translateRequest, translationObjs, translIndexes, sourceLang);

}
}
}
}

@Override
public void detectLanguages(List<TranslationObj> translationObjs, List<Integer> validIndexes) throws TranslationException {
public void detectLanguages(List<TranslationObj> translationObjs) throws TranslationException {
List<Integer> indexesWithoutSourceAndTranslation = IntStream.range(0, translationObjs.size())
.filter(i -> translationObjs.get(i).getSourceLang()==null && translationObjs.get(i).getTranslation()==null)
.boxed()
.collect(Collectors.toList());
if(indexesWithoutSourceAndTranslation.isEmpty()) {
return;
}

if (langDetectService == null) {
throw new TranslationException("No langDetectService configured!",
HttpStatus.SC_INTERNAL_SERVER_ERROR);
}

List<String> detectedLanguages=null;
List<String> texts = validIndexes.stream()
List<String> texts = indexesWithoutSourceAndTranslation.stream()
.map(index -> translationObjs.get(index).getText())
.collect(Collectors.toList());
List<String> detectedLanguages=null;
try {
detectedLanguages = langDetectService.detectLang(texts, null);
} catch (LanguageDetectionException e) {
throw new TranslationException("Error when tryng to detect the language of the text input!",
e.getRemoteStatusCode(), e);
}

for(int i=0;i<detectedLanguages.size();i++) {
translationObjs.get(validIndexes.get(i)).setSourceLang(detectedLanguages.get(i));
if(detectedLanguages!=null) {
for(int i=0;i<detectedLanguages.size();i++) {
translationObjs.get(indexesWithoutSourceAndTranslation.get(i)).setSourceLang(detectedLanguages.get(i));
}
if (LOG.isDebugEnabled()) {
LOG.debug(
"Pangeanic detect lang request with hint null is executed. Detected languages are {} ",
LoggingUtils.sanitizeUserInput(detectedLanguages.toString()));
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import eu.europeana.api.translation.definitions.model.TranslationObj;
import eu.europeana.api.translation.service.AbstractTranslationService;
import eu.europeana.api.translation.service.TranslationService;
Expand Down Expand Up @@ -49,18 +48,11 @@ public List<String> translate(List<String> texts, String targetLanguage) throws
@Override
public void translate(List<TranslationObj> translationObjs, boolean detectLanguages) throws TranslationException {
//first detect languages for the texts that do not have it using the pangeanic lang detect
List<Integer> indexesWithoutSourceLang = IntStream.range(0, translationObjs.size())
.filter(i -> translationObjs.get(i).getSourceLang()==null)
.boxed()
.collect(Collectors.toList());
if(!indexesWithoutSourceLang.isEmpty()) {
translationServicePangeanic.detectLanguages(translationObjs, indexesWithoutSourceLang);
}

translationServicePangeanic.detectLanguages(translationObjs);
//then check if the translations exist in cache
redisCacheService.getCachedTranslations(translationObjs);
boolean anyCachedTransl = translationObjs.stream().filter(el -> el.getIsCached()).collect(Collectors.toList()).size()>0;
//if there is any translation in the cache set the serviceId to null, because we do not know which service translated
//if there is any translation in the cache set the serviceId to null, because we do not know which service translated that
if(anyCachedTransl) {
setServiceId(null);
}
Expand All @@ -85,7 +77,7 @@ public String getExternalServiceEndPoint() {
}

@Override
public void detectLanguages(List<TranslationObj> translationObjs, List<Integer> validIndexes)
public void detectLanguages(List<TranslationObj> translationObjs)
throws TranslationException {
}

Expand Down

0 comments on commit 3d448ca

Please sign in to comment.