Skip to content

Commit

Permalink
Added support for *_drsim fields. Fixed pagination and statistics bug.
Browse files Browse the repository at this point in the history
  • Loading branch information
ivanmrsulja committed Oct 28, 2024
1 parent 14422f5 commit 370946e
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
import edu.cornell.mannlib.vitro.webapp.utils.http.ESHttpsBasicClientFactory;
Expand Down Expand Up @@ -50,6 +52,9 @@ private void addDocument(SearchInputDocument doc)
try {
Map<String, List<Object>> map = convertDocToMap(doc);
String json = new ObjectMapper().writeValueAsString(map);
if (json.contains("_drsim")) {
json = reformatDRSIMFields(json);
}
log.debug("Adding document for '" + doc.getField("DocId") + "': "
+ json);

Expand Down Expand Up @@ -80,6 +85,28 @@ private Map<String, List<Object>> convertDocToMap(SearchInputDocument doc) {
return map;
}

private String reformatDRSIMFields(String json) {
String patternString = "\\[(\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z) TO (\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z)]";
Pattern pattern = Pattern.compile(patternString);
Matcher matcher = pattern.matcher(json);

StringBuffer result = new StringBuffer();

while (matcher.find()) {
String dateStart = matcher.group(1);
String dateEnd = matcher.group(2);

String replacement = String.format("{\"gte\": \"%s\", \"lte\": \"%s\"}", dateStart, dateEnd)
.replace("{", "\\{")
.replace("}", "\\}");

matcher.appendReplacement(result, replacement);
}

matcher.appendTail(result);
return result.toString().replace("[\"{", "{").replace("}\"]", "}");
}

private void putToElastic(String json, String docId)
throws SearchEngineException {
try {
Expand All @@ -96,8 +123,13 @@ private void putToElastic(String json, String docId)
request.addHeader("Content-Type", "application/json");
request.setEntity(new StringEntity(json, "UTF-8"));
HttpResponse response = httpClient.execute(request);
log.debug("Response from Elasticsearch: "
if (response.getStatusLine().getStatusCode() >= 400) {
log.warn("Response from Elasticsearch: "
+ EntityUtils.toString(response.getEntity()));
} else {
log.debug("Response from Elasticsearch: "
+ EntityUtils.toString(response.getEntity()));
}
} catch (Exception e) {
throw new SearchEngineException("Failed to put to Elasticsearch",
e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,11 @@ private void deleteById(String id) throws SearchEngineException {

public void deleteByQuery(String queryString) throws SearchEngineException {
String url = baseUrl + "/_delete_by_query";
SearchQuery query = new BaseSearchQuery().setQuery(queryString.replace("[", "").replace("]", "").replace("*", "0"));
queryString = queryString.replace(" ", "");
if (queryString.contains("*TO")) {
queryString = queryString.replace("[", "").replace("]", "").replace("*", "0");
}
SearchQuery query = new BaseSearchQuery().setQuery(queryString);
String queryJson = new QueryConverter(query).asString();

try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public Iterator<SearchResultDocument> iterator() {

@Override
public long getNumFound() {
return documents.size();
return numberFound;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ private static SearchType decideQueryType(String value) {

if (value.startsWith("\"") && value.endsWith("\"")) {
searchType = SearchType.PHRASE;
} else if (value.contains("TO")) {
} else if (value.contains("TO") && !value.equals("TO")) {
if (value.equals("[*TO*]")) {
searchType = SearchType.EXISTS;
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -117,6 +118,7 @@ private void parseDocumentList() {
log.warn("Didn't find a 'hits.total.value' field in the query response: " + responseMap);
return;
}
totalHits = total;

@SuppressWarnings("unchecked")
List<Map<String, Object>> hits = (List<Map<String, Object>>) uberHits.get("hits");
Expand Down Expand Up @@ -145,8 +147,7 @@ private void parseDocuments(List<Map<String, Object>> hits) {

private SearchResultDocument parseDocument(Map<String, Object> hitMap) {
@SuppressWarnings("unchecked")
Map<String, Collection<Object>> sourceMap = (Map<String, Collection<Object>>) hitMap
.get("_source");
Map<String, Object> sourceMap = (Map<String, Object>) hitMap.get("_source");
if (sourceMap == null) {
log.warn("Didn't find a '_source' field in the hit: " + hitMap);
return null;
Expand All @@ -158,7 +159,22 @@ private SearchResultDocument parseDocument(Map<String, Object> hitMap) {
return null;
}

return new BaseSearchResultDocument(id, sourceMap);
Map<String, Collection<Object>> parsedSourceMap = new HashMap<>();
for (Map.Entry<String, Object> entry : sourceMap.entrySet()) {
Object value = entry.getValue();
if (value instanceof Collection) {
parsedSourceMap.put(entry.getKey(), (Collection<Object>) value);
} else if (value instanceof Map) {
// This is done assuming the only "Map" field will be a _drsim field
parsedSourceMap.put(entry.getKey(), Collections.singletonList(
((Map<String, String>) value).get("gte") + " TO " + ((Map<String, String>) value).get("lte"))
);
} else {
parsedSourceMap.put(entry.getKey(), Collections.singletonList(value));
}
}

return new BaseSearchResultDocument(id, parsedSourceMap);
}

private Map<String, List<String>> parseHighlight(
Expand Down

0 comments on commit 370946e

Please sign in to comment.