Skip to content

Commit

Permalink
EA-3619 add workflows for metadata detction and tranalstions
Browse files Browse the repository at this point in the history
  • Loading branch information
SrishtiSingh-eu committed Nov 29, 2023
1 parent 1b044cb commit ffdda50
Show file tree
Hide file tree
Showing 10 changed files with 935 additions and 5 deletions.
5 changes: 2 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
<module>translation-service-pangeanic</module>
<module>translation-web</module>
<module>translation-tests</module>

<module>translation-record</module>

</modules>

<repositories>
Expand Down Expand Up @@ -58,9 +59,7 @@
<git-commit-id.version>5.0.0</git-commit-id.version>
<git-code-format.version>3.1</git-code-format.version>
<surefire.version>3.0.0-M5</surefire.version>
<!--
<corelib.version>2.16.2</corelib.version>
-->
<!-- sonar plugins -->
<jacoco-plugin.version>0.8.7</jacoco-plugin.version>
<pmd-plugin.version>3.15.0</pmd-plugin.version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ public enum Language {

public static final String DEF = "def";
public static final String NO_LINGUISTIC_CONTENT = "zxx";

// pivot language
public static final String ENGLISH = Language.EN.name().toLowerCase(Locale.ROOT);

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,13 @@ public class TranslationObj {
private String translation;
private String cacheKey;
private boolean isCached;


public TranslationObj(String text, String sourceLang, String targetLang) {
this.text = text;
this.sourceLang = sourceLang;
this.targetLang = targetLang;
}

public String getText() {
return text;
}
Expand Down Expand Up @@ -44,5 +50,5 @@ public boolean getIsCached() {
public void setIsCached(boolean cached) {
this.isCached = cached;
}

}
42 changes: 42 additions & 0 deletions translation-record/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>translation-api</artifactId>
<groupId>eu.europeana.api</groupId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>translation-record</artifactId>

<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
</properties>

<dependencies>
<dependency>
<groupId>eu.europeana.api</groupId>
<artifactId>translation-definitions</artifactId>
<version>0.0.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>eu.europeana.api</groupId>
<artifactId>translation-service-common</artifactId>
<version>0.0.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
</dependency>

<dependency>
<groupId>eu.europeana.corelib</groupId>
<artifactId>corelib-definitions</artifactId>
<version>${corelib.version}</version>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package eu.europeana.api.translation.record.exception;

import eu.europeana.api.commons.error.EuropeanaApiException;
import org.springframework.http.HttpStatus;

public class InvalidParamValueException extends EuropeanaApiException {

public InvalidParamValueException(String msg) {
super(msg);
}

@Override
public HttpStatus getResponseStatus() {
return HttpStatus.BAD_REQUEST;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package eu.europeana.api.translation.record.model;

import eu.europeana.api.translation.definitions.model.TranslationObj;
import eu.europeana.api.translation.service.TranslationService;
import eu.europeana.api.translation.service.exception.TranslationException;
import eu.europeana.corelib.utils.ComparatorUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import javax.validation.constraints.NotNull;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

public class TranslationMap extends LinkedHashMap<String, List<String>> {

private static final long serialVersionUID = 7857857025275959529L;

private static final Logger LOG = LogManager.getLogger(TranslationMap.class);

@NotNull
private final String sourceLanguage;


public TranslationMap(@NotNull String sourceLanguage) {
this.sourceLanguage = sourceLanguage;
}

public TranslationMap(@NotNull String sourceLanguage, String fieldName, List<String> values) {
this.sourceLanguage = sourceLanguage;
add(fieldName, values);
}

/**
* Adds the fieldname and the list of values for that field in the Translation map
*
* @param fieldName
* @param values
*/
public void add(String fieldName, List<String> values) {
if (fieldName != null && !values.isEmpty()) {
if (this.containsKey(fieldName)) {
this.get(fieldName).addAll(values);
} else {
this.put(fieldName, values);
}
}
}

public TranslationMap translate(TranslationService translationService, String targetLanguage) throws TranslationException {
// save the field name and size per field (number of values associated with it)
Map<String, Integer> textsPerField = new LinkedHashMap<>();
List<TranslationObj> translationObjs = new ArrayList<>();

// create Translation objects
for (Map.Entry<String, List<String>> entry : this.entrySet()) {
textsPerField.put(entry.getKey(), entry.getValue().size());
entry.getValue().stream().forEach(value -> translationObjs.add(new TranslationObj(value, this.sourceLanguage, targetLanguage)));
}

// send request for translation
LOG.debug("Sending translate request with target language - {} and source language - {}", targetLanguage, this.sourceLanguage);
translationService.translate(translationObjs);

// create the target language - translated map from the translations received from the service
TranslationMap translatedMap = new TranslationMap(targetLanguage);


int fromIndex = 0;
for (Map.Entry<String, Integer> entry : textsPerField.entrySet()) {
List<String> translatedValues = new ArrayList<>();
for (int i = fromIndex; i < entry.getValue() + fromIndex; i++) {
translatedValues.add(translationObjs.get(i).getTranslation());
}
// remove duplicate translated values if added
translatedMap.add(entry.getKey(), ComparatorUtils.removeDuplicates(translatedValues));
fromIndex = entry.getValue();
}
return translatedMap;
}


@NotNull
public String getSourceLanguage() {
return sourceLanguage;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package eu.europeana.api.translation.record.service;

import eu.europeana.api.commons.error.EuropeanaApiException;
import eu.europeana.api.translation.record.exception.InvalidParamValueException;
import eu.europeana.api.translation.service.exception.TranslationException;
import eu.europeana.corelib.definitions.edm.beans.FullBean;
import eu.europeana.corelib.definitions.edm.entity.ContextualClass;
import eu.europeana.corelib.definitions.edm.entity.Proxy;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.util.ReflectionUtils;

import java.lang.reflect.Field;
import java.util.*;
import java.util.function.Function;

public class BaseService {

private static final Logger LOG = LogManager.getLogger(BaseService.class);

protected static final List<String> PRECENDANCE_LIST = List.of("sk", "hr", "pl", "ro", "it", "sv", "bg", "fr", "es", "cs", "de", "lv", "el", "fi", "nl", "hu", "da", "sl", "et", "pt", "lt", "ga", "en");

private static final Set<String> INCLUDE_PROXY_MAP_FIELDS = Set.of("dcContributor", "dcCoverage", "dcCreator", "dcDate", "dcDescription", "dcFormat","dcPublisher",
"dcRelation", "dcRights", "dcSource", "dcSubject", "dcTitle", "dcType", "dctermsAlternative", "dctermsCreated", "dctermsExtent", "dctermsHasPart", "dctermsHasVersion",
"dctermsIsFormatOf", "dctermsIsPartOf", "dctermsIsReferencedBy", "dctermsIsReplacedBy", "dctermsIsRequiredBy", "dctermsIssued", "dctermsMedium", "dctermsProvenance",
"dctermsReferences", "dctermsSpatial", "dctermsTemporal", "edmCurrentLocation", "edmHasMet");

private static final List<String> ENTITIES = List.of("agents", "concepts", "places", "timespans");

protected static final ReflectionUtils.FieldFilter proxyFieldFilter = field -> field.getType().isAssignableFrom(Map.class) &&
INCLUDE_PROXY_MAP_FIELDS.contains(field.getName());


/**
* Get the europeana proxy from the list of proxy
* There are records present where the first proxy is not always the europeana proxy
* @param proxies
* @param recordId
* @return
* @throws TranslationException
*/
public static Proxy getEuropeanaProxy(List<? extends Proxy> proxies, String recordId) throws EuropeanaApiException {
Optional<? extends Proxy> europeanaProxy = proxies.stream().filter(Proxy :: isEuropeanaProxy).findFirst();
if (europeanaProxy.isPresent()) {
return europeanaProxy.get();
} else {
throw new InvalidParamValueException("Unexpected data - Europeana proxy not present! Record id - " +recordId);
}
}

/**
* Function to get the lang-value map of the field from the proxy Object
* @param proxy
* @param update if true, and the value is null for the field - It sets the empty map in the proxy object
* for that field.
* @return
*/
public static Function<String, Map<String, List<String>>> getValueOfTheField(Proxy proxy, boolean update) {
return e -> {
Field field = ReflectionUtils.findField(proxy.getClass(), e);
ReflectionUtils.makeAccessible(field);
Object value = ReflectionUtils.getField(field, proxy);
// If we are updating the proxy value, then for the field we must set an empty map
// if it doesn't exist already. When we are just fetching the values, we need not alter anything in the proxy object
if (value == null && update) {
ReflectionUtils.setField(field, proxy, new LinkedHashMap<>());
value = ReflectionUtils.getField(field, proxy);
}
if (value instanceof Map) {
return (Map<String, List<String>>) value;
} else if (value != null) { // should not happen as the whitelisted values are all lang-map
LOG.warn("Unexpected data - field {} did not return a map", e);
}
return new LinkedHashMap<>(); // default return an empty map
};
}


/**
* Finds the Contextual entity from the bean matching the uri
* @param bean record
* @param uri url to check
* @return
*/
public static ContextualClass entityExistsWithUrl(FullBean bean, String uri) {
List<ContextualClass> matchingEntity= new ArrayList<>();

// check only entity objects
ReflectionUtils.FieldFilter entityFilter = field -> ENTITIES.contains(field.getName());

ReflectionUtils.doWithFields(bean.getClass(), field -> {
// if we found the Contextual class already, no need to iterate more
if (matchingEntity.size() == 1) {
return;
}
ReflectionUtils.makeAccessible(field);
Object o = ReflectionUtils.getField(field, bean);
LOG.trace("Searching for entities with type {}...", field.getName());
// check only if it's a list and is not empty
if (o instanceof List && !((List<?>) o).isEmpty()) {
List<ContextualClass> entities = (List<ContextualClass>) o;
for (ContextualClass entity : entities) {
if (StringUtils.equalsIgnoreCase(uri, entity.getAbout())) {
LOG.debug(" Found matching entity for {}", entity.getAbout());
matchingEntity.add(entity);
break;
}
}
}
}, entityFilter);

// return Contextual Class if found or else null
return matchingEntity.isEmpty() ? null : matchingEntity.get(0);
}
}

Loading

0 comments on commit ffdda50

Please sign in to comment.