Skip to content

Commit

Permalink
Support auto-complete suggestions created from multiple fields
Browse files Browse the repository at this point in the history
Like in lobid-gnd: configure fields to use in the `format` param
(e.g. `json:title,contribution`), or use `format=json:suggest`
for default fields.

See https://jira.hbz-nrw.de/browse/RPB-141
  • Loading branch information
fsteeg committed Oct 7, 2024
1 parent 89105d3 commit d434220
Show file tree
Hide file tree
Showing 3 changed files with 241 additions and 45 deletions.
151 changes: 110 additions & 41 deletions web/app/controllers/resources/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
Expand All @@ -24,6 +25,8 @@
import java.util.concurrent.Callable;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
Expand All @@ -48,6 +51,7 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;

Expand Down Expand Up @@ -103,7 +107,8 @@ public class Application extends Controller {
public final static Config CONFIG =
ConfigFactory.parseFile(RESOURCES_CONF).resolve();
public final static String MARC_XML_API = CONFIG.getString("mrcx.api");

/** Value delimiter for multiple values in suggest responses. */
public static final String VALUE_DELIMITER = "; ";

static Form<String> queryForm = Form.form(String.class);

Expand Down Expand Up @@ -293,8 +298,8 @@ private static Promise<Result> createResult(final String q,
queryDetails)).as("application/rss+xml");
default:
return responseFormat.startsWith("json:")
? withCallback(
toSuggestions(json, responseFormat.split(":")[1]))
? withCallback(Json.parse(
toSuggestions(json, responseFormat.split(":")[1])))
: responseFor(withQueryMetadata(json, index),
Accept.Format.JSON_LD.queryParamString);
}
Expand Down Expand Up @@ -366,49 +371,113 @@ private static Status withCallback(final JsonNode json) {
final String[] callback =
request() == null || request().queryString() == null ? null
: request().queryString().get("callback");
return callback != null ? ok(String.format("/**/%s(%s)", callback[0], json))
.as("application/javascript; charset=utf-8") : ok(json);
}

private static JsonNode toSuggestions(JsonNode json, String field) {
Stream<JsonNode> documents = StreamSupport
.stream(Spliterators.spliteratorUnknownSize(json.elements(), 0), false);
Stream<JsonNode> suggestions = documents.flatMap((JsonNode document) -> {
Stream<JsonNode> nodes = fieldValues(field, document);
return nodes.map((JsonNode node) -> {
boolean isTextual = node.isTextual();
Optional<JsonNode> label = isTextual ? Optional.ofNullable(node)
: findValueOptional(node, "label");
Optional<JsonNode> id = isTextual ? getOptional(document, "id")
: findValueOptional(node, "id");
Optional<JsonNode> type = isTextual ? getOptional(document, "type")
: findValueOptional(node, "type");
JsonNode types = type.orElseGet(() -> Json.toJson(new String[] { "" }));
String typeText = types.elements().next().textValue();
return Json.toJson(ImmutableMap.of(//
"label", label.orElseGet(() -> Json.toJson("")), //
"id", id.orElseGet(() -> label.orElseGet(() -> Json.toJson(""))), //
"category",
typeText.equals("BibliographicResource")
? Lobid.typeLabel(Json.fromJson(types, List.class))
: typeText));
});
return callback != null
? ok(String.format("/**/%s(%s)", callback[0], json))
.as("application/javascript; charset=utf-8")
: ok(Json.prettyPrint(json)).as("application/json; charset=utf-8");
}

static String toSuggestions(JsonNode json, String labelFields) {
Stream<String> defaultFields =
Stream.of("title", "contribution", "medium", "startDate-endDate");
String fields = labelFields.equals("suggest")
? defaultFields.collect(Collectors.joining(",")) : labelFields;
Stream<JsonNode> documents = Lists.newArrayList(json.elements()).stream();
Stream<JsonNode> suggestions = documents.map((JsonNode document) -> {
Optional<JsonNode> id = getOptional(document, "id");
Optional<JsonNode> type = getOptional(document, "type");
Stream<String> labels = Arrays.asList(fields.split(",")).stream()
.map(String::trim).map(field -> fieldValues(field, document)
.map(Json::toJson).map((JsonNode node) -> //
(node.isTextual() ? Optional.ofNullable(node)
: Optional.ofNullable(node.findValue("label")))
.orElseGet(() -> Json.toJson("")).asText())
.collect(Collectors.joining("; ")));
List<String> categories =
Lists.newArrayList(type.orElseGet(() -> Json.toJson("[]")).elements())
.stream().map(JsonNode::asText)
.filter(t -> !t.equals("BibliographicResource"))
.collect(Collectors.toList());
return Json.toJson(toSuggestionsMap(id, labels, categories));
});
return Json.toJson(suggestions.collect(Collectors.toList()));
return Json.toJson(suggestions.distinct().collect(Collectors.toList()))
.toString();
}

@SuppressWarnings("serial")
private static Map<String, Object> toSuggestionsMap(Optional<JsonNode> id,
Stream<String> labels, List<String> categories) {
return new HashMap<String, Object>() {
{
put("label", labels.filter(t -> !t.trim().isEmpty())
.collect(Collectors.joining(" | ")));
put("id", id.orElseGet(() -> Json.toJson("")));
put("category",
categories.stream().sorted().collect(Collectors.joining(" | ")));
}
};
}

private static Stream<JsonNode> fieldValues(String field, JsonNode document) {
return document.findValues(field).stream().flatMap((node) -> {
return node.isArray()
? StreamSupport.stream(
Spliterators.spliteratorUnknownSize(node.elements(), 0), false)
: Arrays.asList(node).stream();
});
private static Stream<String> fieldValues(String f, JsonNode document) {
String field = f;
// standard case: `field` is a plain field name, use that:
List<String> result = flatStrings(document.findValues(field));
if (result.isEmpty()) {
// `label_fieldName` template, e.g. `since_startDate`
if (field.contains("_")) {
Matcher matcher = Pattern.compile("([^_]+)_([A-Za-z]+)").matcher(field);
while (matcher.find()) {
String label = matcher.group(1);
String fieldName = matcher.group(2);
List<JsonNode> findValues = document.findValues(fieldName);
if (!findValues.isEmpty()) {
String values = flatStrings(findValues).stream()
.collect(Collectors.joining(VALUE_DELIMITER));
field = field.replace(matcher.group(), label + " " + values);
} else {
field = field.replace(matcher.group(), "");
}
}
result =
field.trim().isEmpty() ? Arrays.asList() : Arrays.asList(field);
}
// date ranges, e.g. `startDate-endDate`
else if (field.contains("-")) {
String[] fields = field.split("-");
String v1 = year(document.findValue(fields[0]));
String v2 = year(document.findValue(fields[1]));
result = v1.isEmpty() && v2.isEmpty() ? Lists.newArrayList()
: Arrays.asList(String.format("%s–%s", v1, v2));
}
}
return result.stream();
}

private static List<String> flatStrings(List<JsonNode> values) {
return values.stream().flatMap(node -> toArray(node))
.map(node -> toString(node)).collect(Collectors.toList());
}

private static Stream<JsonNode> toArray(JsonNode node) {
return node.isArray() ? Lists.newArrayList(node.elements()).stream()
: Arrays.asList(node).stream();
}

private static String toString(JsonNode node) {
return year((node.isTextual() ? Optional.ofNullable(node)
: Optional.ofNullable(node.findValue("label")))
.orElseGet(() -> Json.toJson("")).asText());
}

private static String year(JsonNode node) {
if (node == null || !node.isTextual()) {
return "";
}
return year(node.asText());
}

private static Optional<JsonNode> findValueOptional(JsonNode json,
String field) {
return Optional.ofNullable(json.findValue(field));
private static String year(String text) {
return text.matches("\\d{4}-\\d{2}-\\d{2}") ? text.split("-")[0] : text;
}

private static Optional<JsonNode> getOptional(JsonNode json, String field) {
Expand Down
9 changes: 5 additions & 4 deletions web/app/views/api.scala.html
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,11 @@ <h2 id='bulk_downloads'>Bulk-Downloads <small><a href='#bulk_downloads'><span cl
<p>Siehe auch diesen Abschnitt zu <a href="https://blog.lobid.org/2018/07/02/lobid-update.html#bulk-downloads">Bulk-Downloads in unserem Blog</a>.</p>

<h2 id="auto-complete">Autovervollständigung <small><a href='#auto-complete'><span class='glyphicon glyphicon-link'></span></a></small></h2>
<p>Die API unterstützt ein spezielles Antwortformat mit Vorschlägen zur Vervollständigung aus einem angegebenen Feld:</p>
@desc("Titel vorschlagen: \"format=json:title\"", resources.routes.Application.query("title:Werth", format="json:title"))
@desc("Mitwirkende vorschlagen: \"format=json:agent\"", resources.routes.Application.query("contribution.agent.label:Hein", format="json:agent"))
@desc("Schlagwort vorschlagen: \"format=json:subject\"", resources.routes.Application.query("subject.componentList.label:Pferd", format="json:subject"))
<p>Die API unterstützt ein spezielles Antwortformat mit Vorschlägen zur Vervollständigung.</p>
@desc("Standardformat für Vorschläge verwenden: \"format=json:suggest\"", resources.routes.Application.query("Twain", format="json:suggest"))
@desc("Bestimmtes Feld für Vorschläge verwenden: \"format=json:title\"", resources.routes.Application.query("Twain", format="json:title"))
@desc("Vorschläge aus mehreren Feldern zusammenbauen: \"format=json:title,contribution\"", resources.routes.Application.query("Twain", format="json:title,contribution"))
@desc("Feld-Templates zur Anpassung und Gruppierung: \"format=json:title,ab_startDate bis_endDate\"", resources.routes.Application.query("publication.endDate:*+AND+Twain", format="json:title,ab_startDate bis_endDate"))
<p>Damit kann z.B. eine Autovervollständigung umgesetzt werden, bei der zur Suche an Stelle des gewählten Labels die entsprechende ID verwendet werden kann:</p>
<p><form method="GET" class="form-inline" action="/resources/search"> <!-- use full URL in your code, i.e. https://lobid.org/resources/search -->
<input type="text" class="search-resources" id="label" style="width:350px" placeholder="Suchbegriff für Vorschläge eingeben"/>
Expand Down
126 changes: 126 additions & 0 deletions web/test/tests/SuggestionsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package tests;

import static org.hamcrest.CoreMatchers.allOf;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.core.IsEqual.equalTo;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static play.test.Helpers.GET;
import static play.test.Helpers.contentAsString;
import static play.test.Helpers.fakeApplication;
import static play.test.Helpers.fakeRequest;
import static play.test.Helpers.route;
import static play.test.Helpers.running;

import org.junit.Test;

import play.Application;
import play.libs.Json;
import play.mvc.Result;

/**
* Test suggestion responses (see {@link controllers.resources.Application})
*/
@SuppressWarnings("javadoc")
public class SuggestionsTest extends LocalIndexSetup {

@Test
public void suggestionsWithoutCallback() {
Application application = fakeApplication();
running(application, () -> {
Result result = route(application, fakeRequest(GET,
"/resources/search?q=*&filter=type:Book&format=json:title,contribution"));
assertNotNull("We have a result", result);
assertThat(result.contentType(), equalTo("application/json"));
String content = contentAsString(result);
assertNotNull("We can parse the result as JSON", Json.parse(content));
assertThat(content,
allOf(//
containsString("label"), //
containsString("id"), //
containsString("category")));
assertTrue("We used both given fields for any of the labels",
Json.parse(content).findValues("label").stream()
.anyMatch(label -> label.asText().contains(" | ")));
});

}

@Test
public void suggestionsWithCallback() {
Application application = fakeApplication();
running(application, () -> {
Result result = route(application, fakeRequest(GET,
"/resources/search?q=*&filter=type:Book&format=json:title&callback=test"));
assertNotNull("We have a result", result);
assertThat(result.contentType(), equalTo("application/javascript"));
assertThat(contentAsString(result),
allOf(containsString("test("), // callback
containsString("label"), containsString("id"),
containsString("category")));
});
}

@Test
public void suggestionsCorsHeader() {
Application application = fakeApplication();
running(application, () -> {
Result result = route(application,
fakeRequest(GET, "/resources/search?q=*&format=json:title"));
assertNotNull("We have a result", result);
assertThat(result.header("Access-Control-Allow-Origin"), equalTo("*"));
});

}

@Test
public void suggestionsTemplate() {
Application application = fakeApplication();
running(application, () -> {
String format = "json:title,ab_startDate+als_edition";
Result result = route(application, fakeRequest(GET,
"/resources/search?q=*&filter=type:Book&format=" + format));
assertNotNull("We have a result", result);
assertThat(result.contentType(), equalTo("application/json"));
String content = contentAsString(result);
assertNotNull("We can parse the result as JSON", Json.parse(content));
assertTrue(
"We replaced the field names in the template with their values",
Json.parse(content).findValues("label").stream()
.anyMatch(label -> label.asText().contains("als ")));
});
}

@Test
public void suggestionsTemplateMultiValues() {
Application application = fakeApplication();
running(application, () -> {
String format = "json:title,contribution,about_subject";
Result result = route(application,
fakeRequest(GET,
"/resources/search?q=Volksschulwesens&filter=type:Book&format="
+ format));
assertNotNull("We have a result", result);
assertThat(result.contentType(), equalTo("application/json"));
String content = contentAsString(result);
assertNotNull("We can parse the result as JSON", Json.parse(content));
assertThat("Multi-values use consistent delimiter", content, allOf(
containsString("Handwörterbuch des Volksschulwesens"),
containsString("about Erziehung, Bildung, Unterricht; Volksschule")));
});
}

@Test
public void suggestionsArePrettyPrinted() {
Application application = fakeApplication();
running(application, () -> {
Result result = route(application,
fakeRequest(GET, "/resources/search?q=*&format=json:suggest"));
assertNotNull(result);
assertThat(result.contentType(), equalTo("application/json"));
assertThat(contentAsString(result), containsString("}, {\n"));
});
}

}

0 comments on commit d434220

Please sign in to comment.