Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MET-5754 Introduce date range fields for solr #677

Merged
merged 3 commits into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
Expand Down Expand Up @@ -211,7 +210,7 @@ public void publishMongo(RdfWrapper rdf, Date recordDate) throws IndexingExcepti
/**
* Publishes an RDF to solr server
*
* @param rdf RDF to publish.
* @param rdfWrapper RDF to publish.
* @param recordDate The date that would represent the created/updated date of a record
* @throws IndexingException which can be one of:
* <ul>
Expand All @@ -221,26 +220,26 @@ public void publishMongo(RdfWrapper rdf, Date recordDate) throws IndexingExcepti
* contents</li>
* </ul>
*/
public void publishSolr(RdfWrapper rdf, Date recordDate) throws IndexingException {
final FullBeanImpl fullBean = convertRDFToFullBean(rdf);
public void publishSolr(RdfWrapper rdfWrapper, Date recordDate) throws IndexingException {
final FullBeanImpl fullBean = convertRDFToFullBean(rdfWrapper);
if (!preserveUpdateAndCreateTimesFromRdf) {
Date createdDate;
if (rdf.getAbout() == null) {
if (rdfWrapper.getAbout() == null) {
createdDate = recordDate;
} else {
final String solrQuery = String.format("%s:\"%s\"", EdmLabel.EUROPEANA_ID, ClientUtils.escapeQueryChars(rdf.getAbout()));
final String solrQuery = String.format("%s:\"%s\"", EdmLabel.EUROPEANA_ID,
ClientUtils.escapeQueryChars(rdfWrapper.getAbout()));
final Map<String, String> queryParamMap = new HashMap<>();
queryParamMap.put("q", solrQuery);
queryParamMap.put("fl", EdmLabel.TIMESTAMP_CREATED + "," + EdmLabel.EUROPEANA_ID);
SolrDocumentList solrDocuments = getExistingDocuments(queryParamMap);
createdDate = (Date) solrDocuments.stream()
.map(document -> document.getFieldValue(EdmLabel.TIMESTAMP_CREATED.toString()))
.collect(Collectors.toList())
.stream().findFirst().orElse(recordDate);
.toList().stream().findFirst().orElse(recordDate);
}
setUpdateAndCreateTime(null, fullBean, Pair.of(recordDate, createdDate));
}
publishToSolrFinal(rdf, fullBean);
publishToSolrFinal(rdfWrapper, fullBean);
}

private SolrDocumentList getExistingDocuments(Map<String, String> queryParamMap)
Expand Down Expand Up @@ -318,13 +317,14 @@ private FullBeanImpl convertRDFToFullBean(RdfWrapper rdf) {
return fullBeanConverter.convertRdfToFullBean(rdf);
}

private void publishToSolr(RdfWrapper rdf, FullBeanImpl fullBean) throws IndexingException {
private void publishToSolr(RdfWrapper rdfWrapper, FullBeanImpl fullBean) throws IndexingException {

// Create Solr document.
final SolrDocumentPopulator documentPopulator = new SolrDocumentPopulator();
final SolrInputDocument document = new SolrInputDocument();
documentPopulator.populateWithProperties(document, fullBean);
documentPopulator.populateWithFacets(document, rdf);
documentPopulator.populateWithFacets(document, rdfWrapper);
documentPopulator.populateWithDateRanges(document, rdfWrapper);

// Save Solr document.
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,16 @@ public enum EdmLabel {
EUROPEANA_COLLECTIONNAME("europeana_collectionName"),
EUROPEANA_ID("europeana_id"),
TIMESTAMP_CREATED("timestamp_created"),
TIMESTAMP_UPDATED("timestamp_update"),

TIMESTAMP_UPDATED("timestamp_update"),

//DATE RANGES
CREATED_DATE("created_date"),
ISSUED_DATE("issued_date"),
CREATED_DATE_BEGIN("created_date_begin"),
CREATED_DATE_END("created_date_end"),
ISSUED_DATE_BEGIN("issued_date_begin"),
ISSUED_DATE_END("issued_date_end"),

// CRF Fields
FACET_HAS_THUMBNAILS("has_thumbnails"),
FACET_HAS_LANDING_PAGE("has_landingpage"),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
package eu.europeana.indexing.solr;

import static eu.europeana.indexing.solr.EdmLabel.CREATED_DATE;
import static eu.europeana.indexing.solr.EdmLabel.CREATED_DATE_BEGIN;
import static eu.europeana.indexing.solr.EdmLabel.CREATED_DATE_END;
import static eu.europeana.indexing.solr.EdmLabel.ISSUED_DATE;
import static eu.europeana.indexing.solr.EdmLabel.ISSUED_DATE_BEGIN;
import static eu.europeana.indexing.solr.EdmLabel.ISSUED_DATE_END;
import static java.util.Optional.empty;
import static java.util.Optional.of;
import static java.util.Optional.ofNullable;
import static java.util.function.Predicate.not;

import eu.europeana.corelib.definitions.edm.entity.QualityAnnotation;
Expand All @@ -24,7 +33,15 @@
import eu.europeana.indexing.utils.RdfWrapper;
import eu.europeana.indexing.utils.WebResourceLinkType;
import eu.europeana.indexing.utils.WebResourceWrapper;
import eu.europeana.metis.schema.jibx.Begin;
import eu.europeana.metis.schema.jibx.End;
import eu.europeana.metis.schema.jibx.EuropeanaType.Choice;
import eu.europeana.metis.schema.jibx.ProxyType;
import eu.europeana.metis.schema.jibx.ResourceOrLiteralType;
import eu.europeana.metis.schema.jibx.ResourceOrLiteralType.Resource;
import eu.europeana.metis.schema.jibx.TimeSpanType;
import eu.europeana.metis.schema.model.MediaType;
import java.time.LocalDate;
import java.util.Arrays;
import java.util.Collection;
import java.util.EnumSet;
Expand All @@ -33,6 +50,7 @@
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
Expand Down Expand Up @@ -62,29 +80,35 @@ public void populateWithProperties(SolrInputDocument document, FullBeanImpl full
new FullBeanSolrProperties().setProperties(document, fullBean);

// Gather the licenses.
final List<LicenseImpl> licenses = Optional.ofNullable(fullBean.getLicenses()).stream()
.flatMap(List::stream).filter(Objects::nonNull).collect(Collectors.toList());
final List<LicenseImpl> licenses = ofNullable(fullBean.getLicenses()).stream()
.flatMap(List::stream).filter(Objects::nonNull)
.toList();

// Gather the quality annotations.
final Set<String> acceptableTargets = Optional.ofNullable(fullBean.getAggregations()).stream()
.flatMap(Collection::stream).filter(Objects::nonNull)
.map(AggregationImpl::getAbout)
.filter(Objects::nonNull).collect(Collectors.toSet());
final Predicate<QualityAnnotation> hasAcceptableTarget = annotation -> Optional
.ofNullable(annotation.getTarget()).stream().flatMap(Arrays::stream)
.anyMatch(acceptableTargets::contains);
final List<QualityAnnotation> annotationsToAdd = Optional
.ofNullable(fullBean.getQualityAnnotations()).map(List::stream).orElseGet(Stream::empty)
.filter(Objects::nonNull)
.filter(annotation -> StringUtils.isNotBlank(annotation.getBody()))
.filter(hasAcceptableTarget)
.collect(Collectors.toList());
final Set<String> acceptableTargets = ofNullable(fullBean.getAggregations()).stream()
.flatMap(Collection::stream)
.filter(Objects::nonNull)
.map(AggregationImpl::getAbout)
.filter(Objects::nonNull)
.collect(Collectors.toSet());
final Predicate<QualityAnnotation> hasAcceptableTarget = annotation -> ofNullable(annotation.getTarget()).stream().flatMap(
Arrays::stream)
.anyMatch(
acceptableTargets::contains);
final List<QualityAnnotation> annotationsToAdd = ofNullable(fullBean.getQualityAnnotations()).map(List::stream)
.orElseGet(Stream::empty)
.filter(Objects::nonNull)
.filter(
annotation -> StringUtils.isNotBlank(
annotation.getBody()))
.filter(hasAcceptableTarget)
.collect(Collectors.toList());
new QualityAnnotationSolrCreator().addAllToDocument(document, annotationsToAdd);

// Add the containing objects.
new ProvidedChoSolrCreator().addToDocument(document, fullBean.getProvidedCHOs().get(0));
new ProvidedChoSolrCreator().addToDocument(document, fullBean.getProvidedCHOs().getFirst());
new AggregationSolrCreator(licenses, fullBean.getOrganizations())
.addToDocument(document, getDataProviderAggregations(fullBean).get(0));
.addToDocument(document, getDataProviderAggregations(fullBean).getFirst());
new EuropeanaAggregationSolrCreator(licenses)
.addToDocument(document, fullBean.getEuropeanaAggregation());
new ProxySolrCreator().addAllToDocument(document, fullBean.getProxies());
Expand Down Expand Up @@ -151,13 +175,67 @@ public void populateWithFacets(SolrInputDocument document, RdfWrapper rdf) {
}
}

/**
* Populates Solr document with the date range fields. Please note: this method should only be called once on a * given
* document, otherwise the behavior is not defined.
*
* @param document The document to populate.
* @param rdfWrapper The RDF to populate from.
*/
public void populateWithDateRanges(SolrInputDocument document, RdfWrapper rdfWrapper) {
populateWithDateRanges(document, rdfWrapper, Choice::ifCreated, Choice::getCreated, CREATED_DATE, CREATED_DATE_BEGIN,
CREATED_DATE_END);
populateWithDateRanges(document, rdfWrapper, Choice::ifIssued, Choice::getIssued, ISSUED_DATE, ISSUED_DATE_BEGIN,
ISSUED_DATE_END);
}

private void populateWithDateRanges(SolrInputDocument document, RdfWrapper rdfWrapper, Predicate<Choice> choiceTypePredicate,
Function<Choice, ResourceOrLiteralType> choiceValueGetter, EdmLabel edmLabelDate, EdmLabel edmLabelDateBegin,
EdmLabel edmLabelDateEnd) {
final List<TimeSpanType> normalizedTimeSpans
= rdfWrapper.getTimeSpans().stream().filter(timeSpanType -> timeSpanType.getNotation() != null).toList();

final ProxyType europeanaProxy = rdfWrapper.getProxies().stream().filter(RdfWrapper::isEuropeanaProxy).findFirst()
.orElseThrow();

final List<String> proxyChoiceLinks = europeanaProxy.getChoiceList().stream().filter(choiceTypePredicate)
.map(choiceValueGetter).map(ResourceOrLiteralType::getResource)
.map(Resource::getResource).toList();

final List<TimeSpanType> proxyChoiceMatchingTimeSpans = normalizedTimeSpans.stream().filter(
timeSpanType -> proxyChoiceLinks.contains(timeSpanType.getAbout())).toList();

Optional<LocalDate> earliestBegin = empty();
Optional<LocalDate> latestEnd = empty();
for (TimeSpanType timeSpanType : proxyChoiceMatchingTimeSpans) {
final String begin = ofNullable(timeSpanType.getBegin()).map(Begin::getString).orElse(null);
final String end = ofNullable(timeSpanType.getEnd()).map(End::getString).orElse(null);
// If either 'begin' or 'end' is null, set it to the value of the other
final String finalBegin = ofNullable(begin).orElse(end);
final String finalEnd = ofNullable(end).orElse(begin);
// We only need to check if finalBegin is no-null since if finalBegin is non-null then finalEnd will certainly be non-null
if (finalBegin != null) {
document.addField(edmLabelDate.toString(), String.format("[%sTO%s]", finalBegin, finalEnd));

final LocalDate localDateFinalBegin = LocalDate.parse(finalBegin);
final LocalDate localDateFinalEnd = LocalDate.parse(finalEnd);

earliestBegin = earliestBegin.map(earliest -> localDateFinalBegin.isBefore(earliest) ? localDateFinalBegin : earliest)
.or(() -> of(localDateFinalBegin));
latestEnd = latestEnd.map(latest -> localDateFinalEnd.isAfter(latest) ? localDateFinalEnd : latest)
.or(() -> of(localDateFinalEnd));
}
}
earliestBegin.ifPresent(date -> document.addField(edmLabelDateBegin.toString(), date.toString()));
latestEnd.ifPresent(date -> document.addField(edmLabelDateEnd.toString(), date.toString()));
}

private List<AggregationImpl> getDataProviderAggregations(FullBeanImpl fullBean) {
List<String> proxyInResult = fullBean.getProxies().stream()
.filter(not(ProxyImpl::isEuropeanaProxy))
.filter(proxy -> ArrayUtils.isEmpty(proxy.getLineage())).map(ProxyImpl::getProxyIn)
.map(Arrays::asList).flatMap(List::stream).collect(Collectors.toList());
.map(Arrays::asList).flatMap(List::stream).toList();

return fullBean.getAggregations().stream().filter(x -> proxyInResult.contains(x.getAbout()))
.collect(Collectors.toList());
return fullBean.getAggregations().stream().filter(x -> proxyInResult.contains(x.getAbout())).toList();
}
}
Loading