Skip to content

Commit

Permalink
MET-5754 Introduce date range fields for solr
Browse files Browse the repository at this point in the history
  • Loading branch information
stzanakis committed Jul 18, 2024
1 parent 908433b commit 9aada9a
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ public void publishMongo(RdfWrapper rdf, Date recordDate) throws IndexingExcepti
/**
* Publishes an RDF to solr server
*
* @param rdf RDF to publish.
* @param rdfWrapper RDF to publish.
* @param recordDate The date that would represent the created/updated date of a record
* @throws IndexingException which can be one of:
* <ul>
Expand All @@ -221,14 +221,15 @@ public void publishMongo(RdfWrapper rdf, Date recordDate) throws IndexingExcepti
* contents</li>
* </ul>
*/
public void publishSolr(RdfWrapper rdf, Date recordDate) throws IndexingException {
final FullBeanImpl fullBean = convertRDFToFullBean(rdf);
public void publishSolr(RdfWrapper rdfWrapper, Date recordDate) throws IndexingException {
final FullBeanImpl fullBean = convertRDFToFullBean(rdfWrapper);
if (!preserveUpdateAndCreateTimesFromRdf) {
Date createdDate;
if (rdf.getAbout() == null) {
if (rdfWrapper.getAbout() == null) {
createdDate = recordDate;
} else {
final String solrQuery = String.format("%s:\"%s\"", EdmLabel.EUROPEANA_ID, ClientUtils.escapeQueryChars(rdf.getAbout()));
final String solrQuery = String.format("%s:\"%s\"", EdmLabel.EUROPEANA_ID,
ClientUtils.escapeQueryChars(rdfWrapper.getAbout()));
final Map<String, String> queryParamMap = new HashMap<>();
queryParamMap.put("q", solrQuery);
queryParamMap.put("fl", EdmLabel.TIMESTAMP_CREATED + "," + EdmLabel.EUROPEANA_ID);
Expand All @@ -240,7 +241,7 @@ public void publishSolr(RdfWrapper rdf, Date recordDate) throws IndexingExceptio
}
setUpdateAndCreateTime(null, fullBean, Pair.of(recordDate, createdDate));
}
publishToSolrFinal(rdf, fullBean);
publishToSolrFinal(rdfWrapper, fullBean);
}

private SolrDocumentList getExistingDocuments(Map<String, String> queryParamMap)
Expand Down Expand Up @@ -318,13 +319,14 @@ private FullBeanImpl convertRDFToFullBean(RdfWrapper rdf) {
return fullBeanConverter.convertRdfToFullBean(rdf);
}

private void publishToSolr(RdfWrapper rdf, FullBeanImpl fullBean) throws IndexingException {
private void publishToSolr(RdfWrapper rdfWrapper, FullBeanImpl fullBean) throws IndexingException {

// Create Solr document.
final SolrDocumentPopulator documentPopulator = new SolrDocumentPopulator();
final SolrInputDocument document = new SolrInputDocument();
documentPopulator.populateWithProperties(document, fullBean);
documentPopulator.populateWithFacets(document, rdf);
documentPopulator.populateWithFacets(document, rdfWrapper);
documentPopulator.populateWithDateRanges(document, rdfWrapper);

// Save Solr document.
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,16 @@ public enum EdmLabel {
EUROPEANA_COLLECTIONNAME("europeana_collectionName"),
EUROPEANA_ID("europeana_id"),
TIMESTAMP_CREATED("timestamp_created"),
TIMESTAMP_UPDATED("timestamp_update"),

TIMESTAMP_UPDATED("timestamp_update"),

//DATE RANGES
CREATED_DATE("created_date"),
ISSUED_DATE("issued_date"),
CREATED_DATE_BEGIN("created_date_begin"),
CREATED_DATE_END("created_date_end"),
ISSUED_DATE_BEGIN("issued_date_begin"),
ISSUED_DATE_END("issued_date_end"),

// CRF Fields
FACET_HAS_THUMBNAILS("has_thumbnails"),
FACET_HAS_LANDING_PAGE("has_landingpage"),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
package eu.europeana.indexing.solr;

import static eu.europeana.indexing.solr.EdmLabel.CREATED_DATE;
import static eu.europeana.indexing.solr.EdmLabel.CREATED_DATE_BEGIN;
import static eu.europeana.indexing.solr.EdmLabel.CREATED_DATE_END;
import static eu.europeana.indexing.solr.EdmLabel.ISSUED_DATE;
import static eu.europeana.indexing.solr.EdmLabel.ISSUED_DATE_BEGIN;
import static eu.europeana.indexing.solr.EdmLabel.ISSUED_DATE_END;
import static java.util.Optional.empty;
import static java.util.Optional.of;
import static java.util.Optional.ofNullable;
import static java.util.function.Predicate.not;
import static org.apache.commons.lang3.BooleanUtils.isTrue;

import eu.europeana.corelib.definitions.edm.entity.QualityAnnotation;
import eu.europeana.corelib.solr.bean.impl.FullBeanImpl;
Expand All @@ -24,7 +34,15 @@
import eu.europeana.indexing.utils.RdfWrapper;
import eu.europeana.indexing.utils.WebResourceLinkType;
import eu.europeana.indexing.utils.WebResourceWrapper;
import eu.europeana.metis.schema.jibx.Begin;
import eu.europeana.metis.schema.jibx.End;
import eu.europeana.metis.schema.jibx.EuropeanaType.Choice;
import eu.europeana.metis.schema.jibx.ProxyType;
import eu.europeana.metis.schema.jibx.ResourceOrLiteralType;
import eu.europeana.metis.schema.jibx.ResourceOrLiteralType.Resource;
import eu.europeana.metis.schema.jibx.TimeSpanType;
import eu.europeana.metis.schema.model.MediaType;
import java.time.LocalDate;
import java.util.Arrays;
import java.util.Collection;
import java.util.EnumSet;
Expand All @@ -33,6 +51,7 @@
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
Expand Down Expand Up @@ -62,23 +81,29 @@ public void populateWithProperties(SolrInputDocument document, FullBeanImpl full
new FullBeanSolrProperties().setProperties(document, fullBean);

// Gather the licenses.
final List<LicenseImpl> licenses = Optional.ofNullable(fullBean.getLicenses()).stream()
.flatMap(List::stream).filter(Objects::nonNull).collect(Collectors.toList());
final List<LicenseImpl> licenses = ofNullable(fullBean.getLicenses()).stream()
.flatMap(List::stream).filter(Objects::nonNull)
.collect(Collectors.toList());

// Gather the quality annotations.
final Set<String> acceptableTargets = Optional.ofNullable(fullBean.getAggregations()).stream()
.flatMap(Collection::stream).filter(Objects::nonNull)
.map(AggregationImpl::getAbout)
.filter(Objects::nonNull).collect(Collectors.toSet());
final Predicate<QualityAnnotation> hasAcceptableTarget = annotation -> Optional
.ofNullable(annotation.getTarget()).stream().flatMap(Arrays::stream)
.anyMatch(acceptableTargets::contains);
final List<QualityAnnotation> annotationsToAdd = Optional
.ofNullable(fullBean.getQualityAnnotations()).map(List::stream).orElseGet(Stream::empty)
.filter(Objects::nonNull)
.filter(annotation -> StringUtils.isNotBlank(annotation.getBody()))
.filter(hasAcceptableTarget)
.collect(Collectors.toList());
final Set<String> acceptableTargets = ofNullable(fullBean.getAggregations()).stream()
.flatMap(Collection::stream)
.filter(Objects::nonNull)
.map(AggregationImpl::getAbout)
.filter(Objects::nonNull)
.collect(Collectors.toSet());
final Predicate<QualityAnnotation> hasAcceptableTarget = annotation -> ofNullable(annotation.getTarget()).stream().flatMap(
Arrays::stream)
.anyMatch(
acceptableTargets::contains);
final List<QualityAnnotation> annotationsToAdd = ofNullable(fullBean.getQualityAnnotations()).map(List::stream)
.orElseGet(Stream::empty)
.filter(Objects::nonNull)
.filter(
annotation -> StringUtils.isNotBlank(
annotation.getBody()))
.filter(hasAcceptableTarget)
.collect(Collectors.toList());
new QualityAnnotationSolrCreator().addAllToDocument(document, annotationsToAdd);

// Add the containing objects.
Expand Down Expand Up @@ -151,6 +176,61 @@ public void populateWithFacets(SolrInputDocument document, RdfWrapper rdf) {
}
}

/**
* Populates Solr document with the date range fields. Please note: this method should only be called once on a * given
* document, otherwise the behavior is not defined.
*
* @param document The document to populate.
* @param rdfWrapper The RDF to populate from.
*/
public void populateWithDateRanges(SolrInputDocument document, RdfWrapper rdfWrapper) {
populateWithDateRanges(document, rdfWrapper, Choice::ifCreated, Choice::getCreated, CREATED_DATE, CREATED_DATE_BEGIN,
CREATED_DATE_END);
populateWithDateRanges(document, rdfWrapper, Choice::ifIssued, Choice::getIssued, ISSUED_DATE, ISSUED_DATE_BEGIN,
ISSUED_DATE_END);
}

private void populateWithDateRanges(SolrInputDocument document, RdfWrapper rdfWrapper, Predicate<Choice> choiceTypePredicate,
Function<Choice, ResourceOrLiteralType> choiceValueGetter, EdmLabel edmLabelDate, EdmLabel edmLabelDateBegin,
EdmLabel edmLabelDateEnd) {
final List<TimeSpanType> normalizedTimeSpans
= rdfWrapper.getTimeSpans().stream().filter(timeSpanType -> timeSpanType.getNotation() != null).toList();

final ProxyType europeanaProxy = rdfWrapper.getProxies().stream().filter(RdfWrapper::isEuropeanaProxy).findFirst()
.orElseThrow();

final List<String> proxyChoiceLinks = europeanaProxy.getChoiceList().stream().filter(choiceTypePredicate)
.map(choiceValueGetter).map(ResourceOrLiteralType::getResource)
.map(Resource::getResource).toList();

final List<TimeSpanType> proxyChoiceMatchingTimeSpans = normalizedTimeSpans.stream().filter(
timeSpanType -> proxyChoiceLinks.contains(timeSpanType.getAbout())).toList();

Optional<LocalDate> earliestBegin = empty();
Optional<LocalDate> latestEnd = empty();
for (TimeSpanType timeSpanType : proxyChoiceMatchingTimeSpans) {
final String begin = ofNullable(timeSpanType.getBegin()).map(Begin::getString).orElse(null);
final String end = ofNullable(timeSpanType.getEnd()).map(End::getString).orElse(null);
// If either 'begin' or 'end' is null, set it to the value of the other
final String finalBegin = ofNullable(begin).orElse(end);
final String finalEnd = ofNullable(end).orElse(begin);
// We only need to check if finalBegin is no-null since if finalBegin is non-null then finalEnd will certainly be non-null
if (finalBegin != null) {
document.addField(edmLabelDate.toString(), String.format("[%sTO%s]", finalBegin, finalEnd));

final LocalDate localDateFinalBegin = LocalDate.parse(finalBegin);
final LocalDate localDateFinalEnd = LocalDate.parse(finalEnd);

earliestBegin = earliestBegin.map(earliest -> localDateFinalBegin.isBefore(earliest) ? localDateFinalBegin : earliest)
.or(() -> of(localDateFinalBegin));
latestEnd = latestEnd.map(latest -> localDateFinalEnd.isAfter(latest) ? localDateFinalEnd : latest)
.or(() -> of(localDateFinalEnd));
}
}
earliestBegin.ifPresent(date -> document.addField(edmLabelDateBegin.toString(), date.toString()));
latestEnd.ifPresent(date -> document.addField(edmLabelDateEnd.toString(), date.toString()));
}

private List<AggregationImpl> getDataProviderAggregations(FullBeanImpl fullBean) {
List<String> proxyInResult = fullBean.getProxies().stream()
.filter(not(ProxyImpl::isEuropeanaProxy))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public RdfWrapper(RDF rdfRecord) {
this.rdfRecord = rdfRecord;
}

private static boolean isEuropeanaProxy(ProxyType proxy) {
public static boolean isEuropeanaProxy(ProxyType proxy) {
return Optional.of(proxy).map(ProxyType::getEuropeanaProxy)
.map(EuropeanaProxy::isEuropeanaProxy).orElse(Boolean.FALSE);
}
Expand Down

0 comments on commit 9aada9a

Please sign in to comment.