Skip to content

Commit

Permalink
Merge pull request #380 from europeana/EA-3748-debias-improve-duplica…
Browse files Browse the repository at this point in the history
…te-check

improve duplicate check for different targets
  • Loading branch information
gsergiu authored Jun 11, 2024
2 parents dd53491 + 81aa24d commit 86d9e2e
Show file tree
Hide file tree
Showing 13 changed files with 371 additions and 258 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_test_analyse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
key: ${{ runner.os }}-sonar
restore-keys: ${{ runner.os }}-sonar
- name: Build, run tests and analyse
run: mvn -B verify org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Pcoverage -Dsonar.projectKey=europeana_annotation
run: mvn -B clean package verify org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Pcoverage -Dsonar.projectKey=europeana_annotation
env:
# Needed to get some information about the pull request, if any
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,21 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.ws.rs.core.UriBuilder;
import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

import org.apache.commons.httpclient.HttpURL;
import org.apache.commons.lang3.StringUtils;
import org.apache.stanbol.commons.jsonld.JsonSerializer;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.PropertySource;
import org.springframework.core.io.ClassPathResource;
import org.springframework.stereotype.Service;

import eu.europeana.annotation.config.AnnotationConfiguration;
import eu.europeana.annotation.definitions.exception.AnnotationDereferenciationException;
import eu.europeana.annotation.definitions.exception.UpstreamServerErrorRuntimeException;
Expand Down Expand Up @@ -101,8 +98,9 @@ public Map<String, String> dereferenceOne(String uri, String language) {
InputStream streamResponse=null;

try {
UriBuilder uriBuilder = UriBuilder.fromPath(baseUrl).queryParam(PARAM_URI, uri);
streamResponse = httpConnection.getURLContentAsStream(uriBuilder.build().toString());
HttpURL metisRequestUrl = new HttpURL(baseUrl);
metisRequestUrl.setQuery(PARAM_URI, uri);
streamResponse = httpConnection.getURLContentAsStream(metisRequestUrl.toString());
if(streamResponse==null) {
throw new UpstreamServerErrorRuntimeException("MetisDereferenciationClient invalid status code or response not available.");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import eu.europeana.annotation.definitions.model.moderation.Summary;
import eu.europeana.annotation.definitions.model.search.Query;
import eu.europeana.annotation.definitions.model.search.result.ResultSet;
import eu.europeana.annotation.definitions.model.target.Target;
import eu.europeana.annotation.definitions.model.utils.AnnotationIdHelper;
import eu.europeana.annotation.definitions.model.view.AnnotationView;
import eu.europeana.annotation.definitions.model.vocabulary.BodyInternalTypes;
Expand Down Expand Up @@ -611,8 +612,24 @@ private SolrQuery solrUniquenessQueryCaptionsAndSubtitles(Annotation anno, boole

private SolrQuery solrUniquenessQueryDebias(Annotation anno, boolean noSelfDupplicate) {
SolrQuery query = new SolrQuery();

query.setQuery(WebAnnotationModelFields.MOTIVATION + ":\"" + MotivationTypes.HIGHLIGHTING.getOaType() + "\"");
query.addFilterQuery(SolrAnnotationConstants.TARGET_URI + ":\"" + anno.getTarget().get(0).getSource() + "\"");

StringBuilder targetOrQuery=new StringBuilder();
//all validated annotations have at least one target
for(Target t : anno.getTarget()) {
if(targetOrQuery.isEmpty()) {
//for first entry append the bracket
targetOrQuery.append("(\"" + anno.getTarget().get(0).getSource() + "\"");
}else {
//for the rest of the entries append the OR operator
targetOrQuery.append(" OR \"" + t.getSource() + "\"");
}
}
//close bracket in the end
targetOrQuery.append(")");
query.addFilterQuery(SolrAnnotationConstants.TARGET_URI + ":" + targetOrQuery.toString());

List<String> bodyUris = extractUriValues(anno.getBody());
for (int i=0; i<bodyUris.size(); i++) {
query.addFilterQuery(SolrAnnotationConstants.BODY_URI + ":\"" + bodyUris.get(i) + "\"");
Expand Down
Loading

0 comments on commit 86d9e2e

Please sign in to comment.