From 7c483b4b58777d8e6269860c4953dc62540438f8 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Wed, 1 Jan 2025 22:30:00 +0100 Subject: [PATCH] #5211 - AI assistant prototype - Fix displaying indexing progress on the bulk processing page - Added re-index button to assistant sidebar - Normalize embeddings and use dot product instead of cosine when indexing/searching - Allow suspending background tasks for a project - During project import and project initialization from a template, suspend background tasks - During indexing, embed in batches - Fix timing logging for embeddings - Allow auto-detecting the embedding dimensions - Allow using embeddings with a dimension higher than 1024 --- inception/inception-assistant/pom.xml | 4 + .../inception/assistant/AssistantService.java | 2 +- .../assistant/AssistantServiceImpl.java | 26 +-- .../AssistantWebsocketControllerImpl.java | 2 +- .../config/AssistantAutoConfiguration.java | 27 ++- .../config/AssistantChatProperties.java | 2 + .../config/AssistantEmbeddingProperties.java | 12 ++ .../assistant/config/AssistantProperties.java | 2 + .../config/AssistantPropertiesImpl.java | 75 ++++++++ .../assistant/index/DocumentQueryService.java | 2 + .../index/DocumentQueryServiceImpl.java | 44 ++--- .../assistant/index/EmbeddingService.java | 32 ++++ .../assistant/index/EmbeddingServiceImpl.java | 173 ++++++++++++++++++ .../index/HighDimensionLucene99Codec.java | 78 ++++++++ .../index/UpdateDocumentIndexTask.java | 80 ++++++-- .../assistant/sidebar/AssistantSidebar.html | 5 + .../assistant/sidebar/AssistantSidebar.java | 16 +- .../userguide/UserGuideIndexingTask.java | 13 +- .../userguide/UserGuideQueryServiceImpl.java | 45 ++--- .../assistant/AssistantServiceImplTest.java | 13 +- .../UserGuideQueryServiceImplTest.java | 12 +- .../src/test/resources/log4j2-test.xml | 2 +- inception/inception-documents/pom.xml | 4 + .../documents/DocumentStateWatcher.java | 1 + .../imls/llm/ollama/client/OllamaClient.java | 5 +- .../llm/ollama/client/OllamaClientImpl.java | 38 ++-- .../llm/ollama/client/OllamaEmbedRequest.java | 5 +- .../ollama/client/OllamaEmbedResponse.java | 7 +- .../llm/ollama/client/OllamaMetricsImpl.java | 2 - .../OllamaRecommenderAutoConfiguration.java | 11 +- inception/inception-project-export/pom.xml | 4 + .../export/ProjectExportServiceImpl.java | 53 +++--- ...ProjectExportServiceAutoConfiguration.java | 6 +- .../webanno/project/ProjectServiceImpl.java | 43 ++--- .../ukp/inception/scheduling/MatchResult.java | 2 +- .../scheduling/NotifyingTaskMonitor.java | 8 +- .../scheduling/SchedulingService.java | 14 ++ .../scheduling/SchedulingServiceImpl.java | 135 ++++++++++++-- .../ukp/inception/scheduling/Task.java | 13 +- .../ukp/inception/scheduling/TaskMonitor.java | 24 ++- .../SchedulerWebsocketController.java | 2 +- .../inception/search/SearchServiceImpl.java | 24 ++- .../tasks/IndexingTask_ImplBase.java | 14 +- .../search/scheduling/tasks/ReindexTask.java | 11 +- inception/inception-ui-dashboard/pom.xml | 4 + .../ProjectTemplateSelectionDialogPanel.java | 12 +- .../SchedulerWebsocketControllerImpl.java | 6 +- .../event/MatrixWorkloadStateWatcher.java | 2 +- inception/inception-workload/pom.xml | 4 + .../WorkloadManagementAutoConfiguration.java | 2 +- .../model/WorkloadManagementServiceImpl.java | 2 +- .../RecalculateProjectStateTask.java | 24 ++- .../task}/UpdateProjectStateTask.java | 28 ++- .../exporter/ExportedWorkloadManager.java | 2 +- .../exporter/WorkloadManagerExporter.java | 2 +- .../exporter/WorkloadManagerExporterTest.java | 2 +- 56 files changed, 948 insertions(+), 235 deletions(-) create mode 100644 inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/EmbeddingService.java create mode 100644 inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/EmbeddingServiceImpl.java create mode 100644 inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/HighDimensionLucene99Codec.java rename inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/{event => task}/RecalculateProjectStateTask.java (77%) rename inception/{inception-documents/src/main/java/de/tudarmstadt/ukp/inception/documents => inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/task}/UpdateProjectStateTask.java (76%) rename inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/{event => task}/exporter/ExportedWorkloadManager.java (96%) rename inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/{event => task}/exporter/WorkloadManagerExporter.java (98%) diff --git a/inception/inception-assistant/pom.xml b/inception/inception-assistant/pom.xml index 8b1c68699e8..fa9aa8fee1a 100644 --- a/inception/inception-assistant/pom.xml +++ b/inception/inception-assistant/pom.xml @@ -105,6 +105,10 @@ org.apache.commons commons-lang3 + + org.apache.commons + commons-collections4 + commons-io commons-io diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/AssistantService.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/AssistantService.java index 89ae4df7e98..b37d18af94c 100644 --- a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/AssistantService.java +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/AssistantService.java @@ -24,7 +24,7 @@ public interface AssistantService { - List listMessages(String aSessionOwner, Project aProject); + List getConversationMessages(String aSessionOwner, Project aProject); void processUserMessage(String aSessionOwner, Project aProject, MAssistantMessage aMessage); diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/AssistantServiceImpl.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/AssistantServiceImpl.java index 1d14a576031..ddce9734e00 100644 --- a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/AssistantServiceImpl.java +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/AssistantServiceImpl.java @@ -47,9 +47,7 @@ import org.springframework.security.core.session.SessionDestroyedEvent; import org.springframework.security.core.session.SessionRegistry; -import com.knuddels.jtokkit.Encodings; import com.knuddels.jtokkit.api.EncodingRegistry; -import com.knuddels.jtokkit.api.EncodingType; import de.tudarmstadt.ukp.clarin.webanno.model.Project; import de.tudarmstadt.ukp.clarin.webanno.security.model.User; @@ -77,13 +75,13 @@ public class AssistantServiceImpl private final AssistantProperties properties; private final UserGuideQueryService documentationIndexingService; private final DocumentQueryService documentQueryService; - - private final EncodingRegistry registry = Encodings.newLazyEncodingRegistry(); + private final EncodingRegistry encodingRegistry; public AssistantServiceImpl(SessionRegistry aSessionRegistry, SimpMessagingTemplate aMsgTemplate, OllamaClient aOllamaClient, AssistantProperties aProperties, UserGuideQueryService aDocumentationIndexingService, - DocumentQueryService aDocumentQueryService) + DocumentQueryService aDocumentQueryService, + EncodingRegistry aEncodingRegistry) { sessionRegistry = aSessionRegistry; msgTemplate = aMsgTemplate; @@ -92,6 +90,7 @@ public AssistantServiceImpl(SessionRegistry aSessionRegistry, properties = aProperties; documentationIndexingService = aDocumentationIndexingService; documentQueryService = aDocumentQueryService; + encodingRegistry = aEncodingRegistry; } // Set order so this is handled before session info is removed from sessionRegistry @@ -132,7 +131,7 @@ public void onAfterProjectRemoved(AfterProjectRemovedEvent aEvent) } @Override - public List listMessages(String aSessionOwner, Project aProject) + public List getConversationMessages(String aSessionOwner, Project aProject) { var state = getState(aSessionOwner, aProject); return state.getMessages(); @@ -161,14 +160,17 @@ public void processUserMessage(String aSessionOwner, Project aProject, try { var systemMessages = generateSystemMessages(aSessionOwner, aProject, aMessage); var transientMessages = generateTransientMessages(aSessionOwner, aProject, aMessage); - var recentMessages = listMessages(aSessionOwner, aProject); + var recentMessages = getConversationMessages(aSessionOwner, aProject); // We record the message only now to ensure it is not included in the listMessages above recordMessage(aSessionOwner, aProject, aMessage); - // For testing purposes we send this message to the UI - for (var msg : transientMessages) { - dispatchMessage(aSessionOwner, aProject, msg); + if (properties.isDevMode()) { + // For testing purposes we send this message to the UI but do not record it as + // part of the conversation + for (var msg : transientMessages) { + dispatchMessage(aSessionOwner, aProject, msg); + } } var conversation = limitConversationToContextLength(systemMessages, transientMessages, @@ -324,7 +326,9 @@ private List limitConversationToContextLength( // the tokenizer we use counts fewer tokens than the one user by // the model and also to cover for message encoding JSON overhead, // we try to use only 90% of the context window. - var encoding = registry.getEncoding(EncodingType.CL100K_BASE); + var encoding = encodingRegistry.getEncoding(properties.getChat().getEncoding()) + .orElseThrow(() -> new IllegalStateException( + "Unknown encoding: " + properties.getChat().getEncoding())); var limit = floorDiv(aContextLength * 90, 100); var headMessages = new ArrayList(); diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/AssistantWebsocketControllerImpl.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/AssistantWebsocketControllerImpl.java index 050e45dfe03..01ed0a729fa 100644 --- a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/AssistantWebsocketControllerImpl.java +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/AssistantWebsocketControllerImpl.java @@ -68,7 +68,7 @@ public List onSubscribeToAssistantMessages(SimpMessageHeaderA throws IOException { var project = projectService.getProject(aProjectId); - return assistantService.listMessages(aPrincipal.getName(), project); + return assistantService.getConversationMessages(aPrincipal.getName(), project); } @MessageMapping(PROJECT_ASSISTANT_TOPIC_TEMPLATE) diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantAutoConfiguration.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantAutoConfiguration.java index e3119a57f9e..f947392ea0c 100644 --- a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantAutoConfiguration.java +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantAutoConfiguration.java @@ -25,10 +25,15 @@ import org.springframework.messaging.simp.SimpMessagingTemplate; import org.springframework.security.core.session.SessionRegistry; +import com.knuddels.jtokkit.Encodings; +import com.knuddels.jtokkit.api.EncodingRegistry; + import de.tudarmstadt.ukp.inception.assistant.AssistantService; import de.tudarmstadt.ukp.inception.assistant.AssistantServiceImpl; import de.tudarmstadt.ukp.inception.assistant.index.DocumentQueryService; import de.tudarmstadt.ukp.inception.assistant.index.DocumentQueryServiceImpl; +import de.tudarmstadt.ukp.inception.assistant.index.EmbeddingService; +import de.tudarmstadt.ukp.inception.assistant.index.EmbeddingServiceImpl; import de.tudarmstadt.ukp.inception.assistant.sidebar.AssistantSidebarFactory; import de.tudarmstadt.ukp.inception.assistant.userguide.UserGuideQueryService; import de.tudarmstadt.ukp.inception.assistant.userguide.UserGuideQueryServiceImpl; @@ -47,10 +52,10 @@ public class AssistantAutoConfiguration public AssistantService assistantService(SessionRegistry aSessionRegistry, SimpMessagingTemplate aMsgTemplate, OllamaClient aOllamaClient, AssistantProperties aProperties, UserGuideQueryService aDocumentationIndexingService, - DocumentQueryService aDocumentQueryService) + DocumentQueryService aDocumentQueryService, EncodingRegistry aEncodingRegistry) { return new AssistantServiceImpl(aSessionRegistry, aMsgTemplate, aOllamaClient, aProperties, - aDocumentationIndexingService, aDocumentQueryService); + aDocumentationIndexingService, aDocumentQueryService, aEncodingRegistry); } @Bean @@ -61,18 +66,28 @@ public AssistantSidebarFactory assistantSidebarFactory() @Bean public UserGuideQueryService userManualQueryService(AssistantProperties aProperties, - SchedulingService aSchedulingService, OllamaClient aOllamaClient) + SchedulingService aSchedulingService, EmbeddingService aEmbeddingService) { - return new UserGuideQueryServiceImpl(aProperties, aSchedulingService, aOllamaClient); + return new UserGuideQueryServiceImpl(aProperties, aSchedulingService, aEmbeddingService); + } + + @Bean + public EncodingRegistry encodingRegistry() { + return Encodings.newLazyEncodingRegistry(); + } + + @Bean + public EmbeddingService EmbeddingService(AssistantProperties aProperties, OllamaClient aOllamaClient) { + return new EmbeddingServiceImpl(aProperties, aOllamaClient); } @Bean public DocumentQueryService documentQueryService(AssistantProperties aProperties, RepositoryProperties aRepositoryProperties, AssistantDocumentIndexProperties aIndexProperties, SchedulingService aSchedulingService, - OllamaClient aOllamaClient) + OllamaClient aOllamaClient, EmbeddingService aEmbeddingService) { return new DocumentQueryServiceImpl(aProperties, aRepositoryProperties, aIndexProperties, - aSchedulingService, aOllamaClient); + aSchedulingService, aEmbeddingService); } } diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantChatProperties.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantChatProperties.java index e480aafef0f..1d62c1d31bc 100644 --- a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantChatProperties.java +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantChatProperties.java @@ -30,4 +30,6 @@ public interface AssistantChatProperties double getTemperature(); int getContextLength(); + + String getEncoding(); } diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantEmbeddingProperties.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantEmbeddingProperties.java index 58bbc6b3903..ff8326d2f62 100644 --- a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantEmbeddingProperties.java +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantEmbeddingProperties.java @@ -19,6 +19,8 @@ public interface AssistantEmbeddingProperties { + public static final int AUTO_DETECT_DIMENSION = -1; + String getModel(); double getTopP(); @@ -30,4 +32,14 @@ public interface AssistantEmbeddingProperties double getTemperature(); int getSeed(); + + int getContextLength(); + + int getBatchSize(); + + String getEncoding(); + + int getDimension(); + + void setDimension(int aI); } diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantProperties.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantProperties.java index b7f8e9a0818..ff4637c894b 100644 --- a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantProperties.java +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantProperties.java @@ -30,4 +30,6 @@ public interface AssistantProperties AssistantChatProperties getChat(); AssistantEmbeddingProperties getEmbedding(); + + boolean isDevMode(); } diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantPropertiesImpl.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantPropertiesImpl.java index 9a75e92b336..b658e2d74d7 100644 --- a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantPropertiesImpl.java +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/config/AssistantPropertiesImpl.java @@ -17,6 +17,7 @@ */ package de.tudarmstadt.ukp.inception.assistant.config; +import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.context.properties.ConfigurationProperties; @ConfigurationProperties("assistant") @@ -32,6 +33,20 @@ public class AssistantPropertiesImpl private AssistantChatProperties chat = new AssistantChatPropertiesImpl(); private AssistantEmbeddingProperties embedding = new AssistantEmbeddingPropertiesImpl(); + @Value("${inception.dev:false}") // Inject system property or use default if not provided + private boolean devMode; + + @Override + public boolean isDevMode() + { + return devMode; + } + + public void setDevMode(boolean aDevMode) + { + devMode = aDevMode; + } + @Override public String getUrl() { @@ -108,6 +123,7 @@ public static class AssistantChatPropertiesImpl private double repeatPenalty = 1.1; private double temperature = 0.1; private int contextLength = 4096; + private String encoding = "cl100k_base"; @Override public String getModel() @@ -174,6 +190,17 @@ public void setContextLength(int aContextLength) { contextLength = aContextLength; } + + @Override + public String getEncoding() + { + return encoding; + } + + public void setEncoding(String aEncoding) + { + encoding = aEncoding; + } } public static class AssistantEmbeddingPropertiesImpl @@ -186,6 +213,10 @@ public static class AssistantEmbeddingPropertiesImpl private int topK = 1000; private double repeatPenalty = 1.0; private double temperature = 0.0; + private int contextLength = 768; + private int batchSize = 16; + private String encoding = "cl100k_base"; + private int dimension = AUTO_DETECT_DIMENSION; @Override public String getModel() @@ -252,5 +283,49 @@ public void setSeed(int aSeed) { seed = aSeed; } + + @Override + public int getContextLength() + { + return contextLength; + } + + public void setContextLength(int aContextLength) + { + contextLength = aContextLength; + } + + @Override + public int getBatchSize() + { + return batchSize; + } + + public void setBatchSize(int aBatchSize) + { + batchSize = aBatchSize; + } + + @Override + public String getEncoding() + { + return encoding; + } + + public void setEncoding(String aEncoding) + { + encoding = aEncoding; + } + + @Override + public int getDimension() + { + return dimension; + } + + public void setDimension(int aDimension) + { + dimension = aDimension; + } } } diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/DocumentQueryService.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/DocumentQueryService.java index 19b17719559..e128896d360 100644 --- a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/DocumentQueryService.java +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/DocumentQueryService.java @@ -32,4 +32,6 @@ public interface DocumentQueryService PooledIndex borrowIndex(Project aProject) throws Exception; List query(Project aProject, String aQuery, int aTopN, double aScoreThreshold); + + void rebuildIndexAsync(Project aProject); } diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/DocumentQueryServiceImpl.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/DocumentQueryServiceImpl.java index a46a95b26c0..5e2bc1eb7ff 100644 --- a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/DocumentQueryServiceImpl.java +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/DocumentQueryServiceImpl.java @@ -19,6 +19,7 @@ import static de.tudarmstadt.ukp.inception.project.api.ProjectService.PROJECT_FOLDER; import static java.util.Collections.emptyList; +import static org.apache.lucene.util.VectorUtil.l2normalize; import java.io.IOException; import java.lang.invoke.MethodHandles; @@ -52,9 +53,6 @@ import de.tudarmstadt.ukp.inception.documents.event.BeforeDocumentRemovedEvent; import de.tudarmstadt.ukp.inception.project.api.event.AfterProjectRemovedEvent; import de.tudarmstadt.ukp.inception.project.api.event.BeforeProjectRemovedEvent; -import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaClient; -import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaEmbedRequest; -import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaOptions; import de.tudarmstadt.ukp.inception.scheduling.SchedulingService; public class DocumentQueryServiceImpl @@ -66,7 +64,7 @@ public class DocumentQueryServiceImpl private final RepositoryProperties repositoryProperties; private final AssistantDocumentIndexProperties indexProperties; private final SchedulingService schedulingService; - private final OllamaClient ollamaClient; + private final EmbeddingService embeddingService; private static final String INDEX_FOLDER = "index"; private static final String ASSISTANT_FOLDER = "assistant"; @@ -77,13 +75,13 @@ public class DocumentQueryServiceImpl public DocumentQueryServiceImpl(AssistantProperties aAssistantProperties, RepositoryProperties aRepositoryProperties, AssistantDocumentIndexProperties aIndexProperties, SchedulingService aSchedulingService, - OllamaClient aOllamaClient) + EmbeddingService aEmbeddingService) { properties = aAssistantProperties; repositoryProperties = aRepositoryProperties; indexProperties = aIndexProperties; schedulingService = aSchedulingService; - ollamaClient = aOllamaClient; + embeddingService = aEmbeddingService; var indexPoolConfig = new GenericKeyedObjectPoolConfig(); // We only ever want one pooled index per project @@ -129,7 +127,7 @@ public List query(Project aProject, String aQuery, int aTopN, double aSc LOG.trace("KNN Query: [{}]", aQuery); - var queryEmbedding = getEmbedding(aQuery); + var queryEmbedding = l2normalize(embeddingService.embed(aQuery), false); var searcher = new IndexSearcher(reader); var query = new KnnFloatVectorQuery(FIELD_EMBEDDING, queryEmbedding, aTopN); @@ -157,20 +155,6 @@ else if (LOG.isTraceEnabled()) { } } - float[] getEmbedding(String aQuery) throws IOException - { - var request = OllamaEmbedRequest.builder() // - .withModel(properties.getEmbedding().getModel()) // - .withInput(aQuery) // - .withOption(OllamaOptions.TEMPERATURE, properties.getEmbedding().getTemperature()) // - .withOption(OllamaOptions.SEED, properties.getEmbedding().getSeed()) // - .withOption(OllamaOptions.TOP_P, properties.getEmbedding().getTopP()) // - .withOption(OllamaOptions.TOP_K, properties.getEmbedding().getTopK()) // - .withOption(OllamaOptions.REPEAT_PENALTY, properties.getEmbedding().getRepeatPenalty()) // - .build(); - return ollamaClient.generateEmbeddings(properties.getUrl(), request)[0]; - } - private Path getIndexDirectory(Long aProjectId) { return repositoryProperties.getPath().toPath() // @@ -182,6 +166,23 @@ private Path getIndexDirectory(Long aProjectId) .resolve(INDEX_FOLDER); } + @Override + public void rebuildIndexAsync(Project aProject) + { + try (var index = borrowIndex(aProject)) { + index.indexWriter.deleteAll(); + index.indexWriter.commit(); + + schedulingService.enqueue(UpdateDocumentIndexTask.builder() // + .withTrigger("rebuildIndexAsync") // + .withProject(aProject) // + .build()); + } + catch (Exception e) { + LOG.error("Error clearing index", e); + } + } + @EventListener public void onAfterDocumentCreated(AfterDocumentCreatedEvent aEvent) { @@ -265,6 +266,7 @@ public PooledIndex create(Long aKey) throws Exception { var dir = new MMapDirectory(getIndexDirectory(aKey)); var iwc = new IndexWriterConfig(); + iwc.setCodec(new HighDimensionLucene99Codec(embeddingService.getDimension())); return new PooledIndex(aKey, dir, new IndexWriter(dir, iwc)); } diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/EmbeddingService.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/EmbeddingService.java new file mode 100644 index 00000000000..d02de05e3d9 --- /dev/null +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/EmbeddingService.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.assistant.index; + +import java.io.IOException; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +public interface EmbeddingService +{ + float[] embed(String aQuery) throws IOException; + + List> embed(String... aStrings) throws IOException; + + int getDimension(); +} diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/EmbeddingServiceImpl.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/EmbeddingServiceImpl.java new file mode 100644 index 00000000000..1821a5d1af6 --- /dev/null +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/EmbeddingServiceImpl.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.assistant.index; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.context.event.ContextRefreshedEvent; +import org.springframework.context.event.EventListener; + +import de.tudarmstadt.ukp.inception.assistant.config.AssistantProperties; +import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaClient; +import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaEmbedRequest; +import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaOptions; + +public class EmbeddingServiceImpl + implements EmbeddingService +{ + private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private final AssistantProperties properties; + private final OllamaClient ollamaClient; + + public EmbeddingServiceImpl(AssistantProperties aProperties, OllamaClient aOllamaClient) + { + properties = aProperties; + ollamaClient = aOllamaClient; + } + + @EventListener + public void onContextRefreshedEvent(ContextRefreshedEvent aEvent) + { + autoDetectEmbeddingDimension(); + } + + public int getDimension() { + autoDetectEmbeddingDimension(); + return properties.getEmbedding().getDimension(); + } + + @Override + public float[] embed(String aQuery) throws IOException + { + return embed(new String[] { aQuery }).get(0).getValue(); + } + + @Override + public List> embed(String... aStrings) throws IOException + { + autoDetectEmbeddingDimension(); + + var strings = new ArrayList(); + for (var s : aStrings) { + + s = removeEmptyLinesAndTrim(s); + + if (s.isEmpty() || hasHighProportionOfShortSequences(s) + || hasHighProportionOfWhitespaceOrLineBreaks(s)) { + continue; + } + + strings.add(s); + } + + var request = OllamaEmbedRequest.builder() // + .withModel(properties.getEmbedding().getModel()) // + .withInput(strings.toArray(String[]::new)) // + .withOption(OllamaOptions.TEMPERATURE, properties.getEmbedding().getTemperature()) // + .withOption(OllamaOptions.NUM_CTX, properties.getEmbedding().getContextLength()) // + .withOption(OllamaOptions.SEED, properties.getEmbedding().getSeed()) // + .withOption(OllamaOptions.TOP_P, properties.getEmbedding().getTopP()) // + .withOption(OllamaOptions.TOP_K, properties.getEmbedding().getTopK()) // + .withOption(OllamaOptions.REPEAT_PENALTY, + properties.getEmbedding().getRepeatPenalty()) // + .build(); + return ollamaClient.embed(properties.getUrl(), request); + } + + private void autoDetectEmbeddingDimension() + { + var embeddingProperties = properties.getEmbedding(); + synchronized (embeddingProperties) { + if (embeddingProperties.getDimension() <= 0) { + try { + var embedding = ollamaClient.embed(properties.getUrl(), OllamaEmbedRequest + .builder() // + .withModel(embeddingProperties.getModel()) // + .withInput( + "We just need to know the dimension of the generated embedding. Thanks!") // + .build()).get(0).getValue(); + embeddingProperties.setDimension(embedding.length); + LOG.info("Auto-detected embedding dimension of model [{}]: {}", + embeddingProperties.getModel(), embeddingProperties.getDimension()); + } + catch (Exception e) { + LOG.error("Unable to auto-detect embedding dimension - using default", e); + embeddingProperties.setDimension(1024); + } + } + } + } + + static String removeEmptyLinesAndTrim(String aString) + { + if (aString == null || aString.isEmpty()) { + return aString; + } + + return aString.lines() // + .map(String::trim).filter(line -> !line.isEmpty()) // Remove empty lines + .reduce((line1, line2) -> line1 + "\n" + line2) // Combine lines with line breaks + .orElse(""); // Default to an empty string if no lines remain + } + + static boolean hasHighProportionOfShortSequences(String aString) + { + if (aString == null || aString.isEmpty()) { + return false; + } + + // Split the string into words + var words = aString.split("\\s+"); + + // Count single and double-character words + long shortWordCount = 0; + for (var word : words) { + if (word.length() <= 2) { + shortWordCount++; + } + } + + // Consider "high proportion" to be more than 50% + var shortWordProportion = (double) shortWordCount / words.length; + return shortWordProportion > 0.5; + } + + static boolean hasHighProportionOfWhitespaceOrLineBreaks(String aString) + { + if (aString == null || aString.isEmpty()) { + return false; + } + + var totalChars = aString.length(); + + var whitespaceOrLineBreakCount = aString.chars() + .filter(c -> Character.isWhitespace(c) || c == '\n' || c == '\r').count(); + + // Consider "high proportion" to be more than 50% + double proportion = (double) whitespaceOrLineBreakCount / totalChars; + return proportion > 0.5; + } + +} diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/HighDimensionLucene99Codec.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/HighDimensionLucene99Codec.java new file mode 100644 index 00000000000..5312a783415 --- /dev/null +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/HighDimensionLucene99Codec.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.assistant.index; + +import java.io.IOException; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.KnnVectorsWriter; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + +public class HighDimensionLucene99Codec + extends Lucene99Codec +{ + private final KnnVectorsFormat defaultKnnVectorsFormat; + + public HighDimensionLucene99Codec(int aDimension) + { + var knnFormat = new Lucene99HnswVectorsFormat(); + defaultKnnVectorsFormat = new HighDimensionKnnVectorsFormat(knnFormat, aDimension); + } + + @Override + public KnnVectorsFormat getKnnVectorsFormatForField(String field) + { + return defaultKnnVectorsFormat; + } + + private static class HighDimensionKnnVectorsFormat + extends KnnVectorsFormat + { + private final KnnVectorsFormat knnFormat; + private final int maxDimensions; + + public HighDimensionKnnVectorsFormat(KnnVectorsFormat aKnnFormat, int aMaxDimensions) + { + super(aKnnFormat.getName()); + knnFormat = aKnnFormat; + maxDimensions = aMaxDimensions; + } + + @Override + public KnnVectorsWriter fieldsWriter(SegmentWriteState aState) throws IOException + { + return knnFormat.fieldsWriter(aState); + } + + @Override + public KnnVectorsReader fieldsReader(SegmentReadState aState) throws IOException + { + return knnFormat.fieldsReader(aState); + } + + @Override + public int getMaxDimensions(String fieldName) + { + return maxDimensions; + } + } +} diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/UpdateDocumentIndexTask.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/UpdateDocumentIndexTask.java index fe25370ff8d..9adbbfab4b8 100644 --- a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/UpdateDocumentIndexTask.java +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/index/UpdateDocumentIndexTask.java @@ -24,16 +24,22 @@ import static de.tudarmstadt.ukp.inception.assistant.index.DocumentQueryService.FIELD_SOURCE_DOC_COMPLETE; import static de.tudarmstadt.ukp.inception.assistant.index.DocumentQueryService.FIELD_TEXT; import static de.tudarmstadt.ukp.inception.scheduling.TaskState.CANCELLED; +import static de.tudarmstadt.ukp.inception.scheduling.TaskState.COMPLETED; import static de.tudarmstadt.ukp.inception.scheduling.TaskState.RUNNING; +import static java.lang.Math.floorDiv; import static java.time.Duration.ofSeconds; -import static org.apache.lucene.index.VectorSimilarityFunction.COSINE; +import static org.apache.commons.collections4.ListUtils.partition; +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; +import static org.apache.lucene.util.VectorUtil.l2normalize; import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Objects; +import org.apache.commons.lang3.tuple.Pair; import org.apache.lucene.document.Document; import org.apache.lucene.document.KnnFloatVectorField; import org.apache.lucene.document.LongPoint; @@ -41,6 +47,7 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.search.FieldExistsQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.uima.cas.CAS; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -48,11 +55,13 @@ import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.inception.annotation.storage.CasStorageSession; +import de.tudarmstadt.ukp.inception.assistant.config.AssistantProperties; import de.tudarmstadt.ukp.inception.assistant.index.DocumentQueryServiceImpl.PooledIndex; import de.tudarmstadt.ukp.inception.documents.api.DocumentService; import de.tudarmstadt.ukp.inception.scheduling.DebouncingTask; import de.tudarmstadt.ukp.inception.scheduling.ProjectTask; import de.tudarmstadt.ukp.inception.scheduling.TaskScope; +import de.tudarmstadt.ukp.inception.support.logging.LogMessage; public class UpdateDocumentIndexTask extends DebouncingTask @@ -63,7 +72,9 @@ public class UpdateDocumentIndexTask public static final String TYPE = "UpdateDocumentIndexTask"; private @Autowired DocumentService documentService; - private @Autowired DocumentQueryServiceImpl indexService; + private @Autowired DocumentQueryServiceImpl documentQueryService; + private @Autowired EmbeddingService embeddingService; + private @Autowired AssistantProperties properties; public UpdateDocumentIndexTask(Builder> aBuilder) { @@ -85,7 +96,7 @@ public void execute() throws Exception } var monitor = getMonitor(); - try (var index = indexService.borrowIndex(getProject())) { + try (var index = documentQueryService.borrowIndex(getProject())) { try (var reader = DirectoryReader.open(index.getIndexWriter())) { var searcher = new IndexSearcher(reader); var query = new FieldExistsQuery(FIELD_SOURCE_DOC_COMPLETE); @@ -107,8 +118,9 @@ public void execute() throws Exception } } - monitor.setStateAndProgress(RUNNING, 0, - documentsToUnindex.size() + documentsToIndex.size()); + var toProcess = documentsToUnindex.size() + documentsToIndex.size() * 100; + var processed = 0; + monitor.setStateAndProgress(RUNNING, processed * 100, toProcess); for (var sourceDocumentId : documentsToUnindex) { if (monitor.isCancelled()) { @@ -116,8 +128,10 @@ public void execute() throws Exception break; } - monitor.incrementProgress(); + monitor.setProgressWithMessage(processed * 100, toProcess, + LogMessage.info(this, "Unindexing...")); unindexDocument(index, sourceDocumentId); + processed++; } try (var session = CasStorageSession.openNested()) { @@ -127,12 +141,15 @@ public void execute() throws Exception break; } + monitor.setProgressWithMessage(processed * 100, toProcess, + LogMessage.info(this, "Indexing: %s", sourceDocument.getName())); indexDocument(index, sourceDocument); - monitor.incrementProgress(); + processed++; } } if (!monitor.isCancelled()) { + monitor.setStateAndProgress(COMPLETED, processed * 100, toProcess); index.getIndexWriter().commit(); // We can probably live with a partial index, so we do not roll back if // cancelled @@ -161,20 +178,53 @@ private void indexDocument(PooledIndex aIndex, SourceDocument aSourceDocument) var cas = documentService.createOrReadInitialCas(aSourceDocument, AUTO_CAS_UPGRADE, SHARED_READ_ONLY_ACCESS); - for (var sentence : cas.select(Sentence.class)) { - var text = sentence.getCoveredText(); - var docEmbedding = indexService.getEmbedding(text); - var doc = new Document(); - doc.add(new KnnFloatVectorField(FIELD_EMBEDDING, docEmbedding, COSINE)); - doc.add(new StoredField(FIELD_TEXT, text)); - aIndex.getIndexWriter().addDocument(doc); + var chunks = chunk(cas); + var batches = partition(chunks, properties.getEmbedding().getBatchSize()); + + var totalBatches = batches.size(); + var processedBatches = 0; + var sentencesSeen = 0; + var progressOffset = getMonitor().getProgress(); + for (var batch : batches) { + try { + var docEmbeddings = embeddingService.embed(batch.toArray(String[]::new)); + for (var embedding : docEmbeddings) { + addToIndex(aIndex, embedding); + } + } + catch (IOException e) { + LOG.error("Error indexing document {} chunks {}-{} ", aSourceDocument, + sentencesSeen, sentencesSeen + batch.size(), e); + } + + getMonitor().setStateAndProgress(RUNNING, + progressOffset + floorDiv(processedBatches * 100, totalBatches)); + + sentencesSeen += batch.size(); + processedBatches++; } } catch (IOException e) { - LOG.error("Error indexing document {}", aSourceDocument, e); + LOG.error("Error indexing document", aSourceDocument, e); } } + private List chunk(CAS cas) + { + return cas.select(Sentence.class) // + .map(Sentence::getCoveredText) // + .toList(); + } + + private void addToIndex(PooledIndex aIndex, Pair embedding) throws IOException + { + var doc = new Document(); + var normalizedEmbedding = l2normalize(embedding.getValue(), false); + doc.add(new KnnFloatVectorField(FIELD_EMBEDDING, normalizedEmbedding, DOT_PRODUCT)); + doc.add(new StoredField(FIELD_TEXT, embedding.getKey())); + aIndex.getIndexWriter().addDocument(doc); + } + @Override public boolean equals(Object o) { diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/sidebar/AssistantSidebar.html b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/sidebar/AssistantSidebar.html index cfacea93948..252f3d21bf9 100644 --- a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/sidebar/AssistantSidebar.html +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/sidebar/AssistantSidebar.html @@ -22,6 +22,11 @@
+
+ +
diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/sidebar/AssistantSidebar.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/sidebar/AssistantSidebar.java index 689d36afead..434a1a116b5 100644 --- a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/sidebar/AssistantSidebar.java +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/sidebar/AssistantSidebar.java @@ -17,21 +17,35 @@ */ package de.tudarmstadt.ukp.inception.assistant.sidebar; +import org.apache.wicket.ajax.AjaxRequestTarget; +import org.apache.wicket.spring.injection.annot.SpringBean; + import de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasProvider; import de.tudarmstadt.ukp.clarin.webanno.ui.annotation.AnnotationPageBase2; import de.tudarmstadt.ukp.clarin.webanno.ui.annotation.sidebar.AnnotationSidebar_ImplBase; +import de.tudarmstadt.ukp.inception.assistant.index.DocumentQueryService; import de.tudarmstadt.ukp.inception.editor.action.AnnotationActionHandler; +import de.tudarmstadt.ukp.inception.support.lambda.LambdaAjaxLink; public class AssistantSidebar extends AnnotationSidebar_ImplBase { private static final long serialVersionUID = -1585047099720374119L; + private @SpringBean DocumentQueryService documentQueryService; + public AssistantSidebar(String aId, AnnotationActionHandler aActionHandler, CasProvider aCasProvider, AnnotationPageBase2 aAnnotationPage) { super(aId, aActionHandler, aCasProvider, aAnnotationPage); - + add(new AssistantPanel("chat")); + + add(new LambdaAjaxLink("reindex", this::actionReindex)); + } + + private void actionReindex(AjaxRequestTarget aTarget) + { + documentQueryService.rebuildIndexAsync(getAnnotationPage().getProject()); } } diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/userguide/UserGuideIndexingTask.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/userguide/UserGuideIndexingTask.java index 42326d55a89..1de95eb2d19 100644 --- a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/userguide/UserGuideIndexingTask.java +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/userguide/UserGuideIndexingTask.java @@ -21,11 +21,13 @@ import static de.tudarmstadt.ukp.inception.scheduling.TaskState.RUNNING; import static java.lang.System.currentTimeMillis; import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.commons.collections4.ListUtils.partition; import java.lang.invoke.MethodHandles; import java.net.URL; import org.jsoup.Jsoup; +import org.jsoup.nodes.Element; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -67,15 +69,18 @@ public void execute() throws Exception LOG.info( "Started building user guide index in the background... this may take a moment"); var startTime = currentTimeMillis(); - var blocks = userGuide.select(".i7n-assistant"); + var blocks = userGuide.select(".i7n-assistant").stream() // + .map(Element::text) // + .toList(); + var blockChunks = partition(blocks, 100); var monitor = getMonitor(); monitor.setStateAndProgress(RUNNING, 0, blocks.size()); var blocksIndexed = 0; - for (var block : blocks) { - blocksIndexed++; - documentationIndexingService.indexBlock(iw, block.text()); + for (var blockChunk : blockChunks) { + blocksIndexed += blockChunks.size(); + documentationIndexingService.indexBlocks(iw, blockChunk.toArray(String[]::new)); monitor.setProgress(blocksIndexed); } diff --git a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/userguide/UserGuideQueryServiceImpl.java b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/userguide/UserGuideQueryServiceImpl.java index 6b8c4c4c89f..7b5281cc1ef 100644 --- a/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/userguide/UserGuideQueryServiceImpl.java +++ b/inception/inception-assistant/src/main/java/de/tudarmstadt/ukp/inception/assistant/userguide/UserGuideQueryServiceImpl.java @@ -21,7 +21,8 @@ import static java.nio.file.Files.readString; import static java.nio.file.Files.writeString; import static java.util.Collections.emptyList; -import static org.apache.lucene.index.VectorSimilarityFunction.COSINE; +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; +import static org.apache.lucene.util.VectorUtil.l2normalize; import java.io.File; import java.io.IOException; @@ -48,9 +49,8 @@ import org.springframework.context.event.EventListener; import de.tudarmstadt.ukp.inception.assistant.config.AssistantProperties; -import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaClient; -import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaEmbedRequest; -import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaOptions; +import de.tudarmstadt.ukp.inception.assistant.index.EmbeddingService; +import de.tudarmstadt.ukp.inception.assistant.index.HighDimensionLucene99Codec; import de.tudarmstadt.ukp.inception.scheduling.SchedulingService; import de.tudarmstadt.ukp.inception.support.SettingsUtil; @@ -62,20 +62,20 @@ public class UserGuideQueryServiceImpl private static final String FIELD_TEXT = "text"; private static final String FIELD_EMBEDDING = "field"; - private final OllamaClient ollamaClient; private final AssistantProperties properties; private final SchedulingService schedulingService; + private final EmbeddingService embeddingService; private Directory indexDir; private DirectoryReader indexReader; private volatile boolean destroyed = false; public UserGuideQueryServiceImpl(AssistantProperties aProperties, - SchedulingService aSchedulingService, OllamaClient aOllamaClient) + SchedulingService aSchedulingService, EmbeddingService aEmbeddingService) { properties = aProperties; schedulingService = aSchedulingService; - ollamaClient = aOllamaClient; + embeddingService = aEmbeddingService; } @EventListener @@ -161,7 +161,7 @@ public List query(String aQuery, int aTopN, double aScoreThreshold) try { var reader = getSharedIndexReader(); - var queryEmbedding = getEmbedding(aQuery); + var queryEmbedding = l2normalize(embeddingService.embed(aQuery), false); var searcher = new IndexSearcher(reader); var query = new KnnFloatVectorQuery(FIELD_EMBEDDING, queryEmbedding, aTopN); @@ -191,6 +191,7 @@ else if (LOG.isTraceEnabled()) { IndexWriter getIndexWriter() throws IOException { var iwc = new IndexWriterConfig(); + iwc.setCodec(new HighDimensionLucene99Codec(embeddingService.getDimension())); return new IndexWriter(getSharedIndexDirectory(), iwc); } @@ -212,31 +213,19 @@ synchronized Directory getSharedIndexDirectory() throws IOException return indexDir; } - void indexBlock(IndexWriter aWriter, String aText) throws IOException + void indexBlocks(IndexWriter aWriter, String... aText) throws IOException { if (destroyed) { return; } - var docEmbedding = getEmbedding(aText); - var doc = new Document(); - doc.add(new KnnFloatVectorField(FIELD_EMBEDDING, docEmbedding, COSINE)); - doc.add(new StoredField(FIELD_TEXT, aText)); - aWriter.addDocument(doc); - } - - float[] getEmbedding(String aText) throws IOException - { - var request = OllamaEmbedRequest.builder() // - .withModel(properties.getEmbedding().getModel()) // - .withInput(aText) // - .withOption(OllamaOptions.TEMPERATURE, properties.getEmbedding().getTemperature()) // - .withOption(OllamaOptions.SEED, properties.getEmbedding().getSeed()) // - .withOption(OllamaOptions.TOP_P, properties.getEmbedding().getTopP()) // - .withOption(OllamaOptions.TOP_K, properties.getEmbedding().getTopK()) // - .withOption(OllamaOptions.REPEAT_PENALTY, properties.getEmbedding().getRepeatPenalty()) // - .build(); - return ollamaClient.generateEmbeddings(properties.getUrl(), request)[0]; + var docEmbeddings = embeddingService.embed(aText); + for (var embedding : docEmbeddings) { + var doc = new Document(); + doc.add(new KnnFloatVectorField(FIELD_EMBEDDING, l2normalize(embedding.getValue(), false), DOT_PRODUCT)); + doc.add(new StoredField(FIELD_TEXT, embedding.getKey())); + aWriter.addDocument(doc); + } } void deleteIndex() diff --git a/inception/inception-assistant/src/test/java/de/tudarmstadt/ukp/inception/assistant/AssistantServiceImplTest.java b/inception/inception-assistant/src/test/java/de/tudarmstadt/ukp/inception/assistant/AssistantServiceImplTest.java index 5e75162f246..5deca78b6ea 100644 --- a/inception/inception-assistant/src/test/java/de/tudarmstadt/ukp/inception/assistant/AssistantServiceImplTest.java +++ b/inception/inception-assistant/src/test/java/de/tudarmstadt/ukp/inception/assistant/AssistantServiceImplTest.java @@ -67,9 +67,9 @@ import de.tudarmstadt.ukp.inception.documents.api.RepositoryProperties; import de.tudarmstadt.ukp.inception.documents.config.DocumentServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceAutoConfiguration; +import de.tudarmstadt.ukp.inception.log.config.EventLoggingAutoConfiguration; import de.tudarmstadt.ukp.inception.project.api.ProjectService; -import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaClient; -import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaClientImpl; +import de.tudarmstadt.ukp.inception.search.config.SearchServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.support.logging.Logging; import de.tudarmstadt.ukp.inception.support.spring.ApplicationContextProvider; import de.tudarmstadt.ukp.inception.support.test.websocket.WebSocketStompTestClient; @@ -86,7 +86,9 @@ "websocket.enabled=true"}) @SpringBootApplication( // exclude = { // - LiquibaseAutoConfiguration.class }) + LiquibaseAutoConfiguration.class, // + SearchServiceAutoConfiguration.class, // + EventLoggingAutoConfiguration.class }) @ImportAutoConfiguration({ // SecurityAutoConfiguration.class, // WebsocketAutoConfiguration.class, // @@ -238,11 +240,6 @@ public DaoAuthenticationProvider authenticationProvider(PasswordEncoder aEncoder return authProvider; } - @Bean - public OllamaClient ollamaClient() { - return new OllamaClientImpl(); - } - @Order(100) @Bean public SecurityFilterChain wsFilterChain(HttpSecurity aHttp) throws Exception diff --git a/inception/inception-assistant/src/test/java/de/tudarmstadt/ukp/inception/assistant/userguide/UserGuideQueryServiceImplTest.java b/inception/inception-assistant/src/test/java/de/tudarmstadt/ukp/inception/assistant/userguide/UserGuideQueryServiceImplTest.java index e0968629b25..6c284bad9e7 100644 --- a/inception/inception-assistant/src/test/java/de/tudarmstadt/ukp/inception/assistant/userguide/UserGuideQueryServiceImplTest.java +++ b/inception/inception-assistant/src/test/java/de/tudarmstadt/ukp/inception/assistant/userguide/UserGuideQueryServiceImplTest.java @@ -37,6 +37,7 @@ import de.tudarmstadt.ukp.inception.assistant.config.AssistantProperties; import de.tudarmstadt.ukp.inception.assistant.config.AssistantPropertiesImpl; +import de.tudarmstadt.ukp.inception.assistant.index.EmbeddingServiceImpl; import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaClient; import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaClientImpl; import de.tudarmstadt.ukp.inception.scheduling.SchedulingService; @@ -51,6 +52,7 @@ class UserGuideQueryServiceImplTest private AssistantProperties assistantProperties; private OllamaClient ollamaClient; private UserGuideQueryServiceImpl sut; + private EmbeddingServiceImpl embeddingService; private static @TempDir Path applicationHome; @@ -66,7 +68,8 @@ void setup() { assistantProperties = new AssistantPropertiesImpl(); ollamaClient = new OllamaClientImpl(); - sut = new UserGuideQueryServiceImpl(assistantProperties, schedulingService, ollamaClient); + embeddingService = new EmbeddingServiceImpl(assistantProperties, ollamaClient); + sut = new UserGuideQueryServiceImpl(assistantProperties, schedulingService, embeddingService); } @AfterEach @@ -79,9 +82,10 @@ void tearDown() void testSimpleIndexAndQuery() throws Exception { try (var iw = sut.getIndexWriter()) { - sut.indexBlock(iw, "Waldi is a dog."); - sut.indexBlock(iw, "Miau is a cat."); - sut.indexBlock(iw, "Tweety is a bird."); + sut.indexBlocks(iw, // + "Waldi is a dog.", // + "Miau is a cat.", // + "Tweety is a bird."); } sut.markIndexUpToDate(); diff --git a/inception/inception-assistant/src/test/resources/log4j2-test.xml b/inception/inception-assistant/src/test/resources/log4j2-test.xml index ccb35bc231a..490493a85cd 100644 --- a/inception/inception-assistant/src/test/resources/log4j2-test.xml +++ b/inception/inception-assistant/src/test/resources/log4j2-test.xml @@ -17,7 +17,7 @@ - + diff --git a/inception/inception-documents/pom.xml b/inception/inception-documents/pom.xml index 02c2e7f4b08..5ccae8ae774 100644 --- a/inception/inception-documents/pom.xml +++ b/inception/inception-documents/pom.xml @@ -78,6 +78,10 @@ de.tudarmstadt.ukp.inception.app inception-security + + de.tudarmstadt.ukp.inception.app + inception-workload + diff --git a/inception/inception-documents/src/main/java/de/tudarmstadt/ukp/inception/documents/DocumentStateWatcher.java b/inception/inception-documents/src/main/java/de/tudarmstadt/ukp/inception/documents/DocumentStateWatcher.java index ae03af97524..268b1502946 100644 --- a/inception/inception-documents/src/main/java/de/tudarmstadt/ukp/inception/documents/DocumentStateWatcher.java +++ b/inception/inception-documents/src/main/java/de/tudarmstadt/ukp/inception/documents/DocumentStateWatcher.java @@ -25,6 +25,7 @@ import de.tudarmstadt.ukp.inception.documents.event.BeforeDocumentRemovedEvent; import de.tudarmstadt.ukp.inception.documents.event.DocumentStateChangedEvent; import de.tudarmstadt.ukp.inception.scheduling.SchedulingService; +import de.tudarmstadt.ukp.inception.workload.task.UpdateProjectStateTask; /** * Watches the document state in of projects using matrix workload. diff --git a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaClient.java b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaClient.java index 1e8379c1b3d..ea8b0804476 100644 --- a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaClient.java +++ b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaClient.java @@ -21,6 +21,8 @@ import java.util.List; import java.util.function.Consumer; +import org.apache.commons.lang3.tuple.Pair; + public interface OllamaClient { String generate(String aUrl, OllamaGenerateRequest aRequest) throws IOException; @@ -34,5 +36,6 @@ String generate(String aUrl, OllamaChatRequest aRequest, Consumer listModels(String aUrl) throws IOException; - float[][] generateEmbeddings(String aUrl, OllamaEmbedRequest aRequest) throws IOException; + List> embed(String aUrl, OllamaEmbedRequest aRequest) + throws IOException; } diff --git a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaClientImpl.java b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaClientImpl.java index cccdf83aeca..a53aa535693 100644 --- a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaClientImpl.java +++ b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaClientImpl.java @@ -31,10 +31,12 @@ import java.net.http.HttpRequest.BodyPublishers; import java.net.http.HttpResponse; import java.time.Duration; +import java.util.ArrayList; import java.util.List; import java.util.function.Consumer; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -72,11 +74,7 @@ public OllamaClientImpl(HttpClient aClient, OllamaMetrics aMetrics) protected HttpResponse sendRequest(HttpRequest aRequest) throws IOException { try { - var response = client.send(aRequest, HttpResponse.BodyHandlers.ofInputStream()); - - handleError(response); - - return response; + return client.send(aRequest, HttpResponse.BodyHandlers.ofInputStream()); } catch (IOException | InterruptedException e) { throw new IOException("Error while sending request: " + e.getMessage(), e); @@ -175,7 +173,8 @@ public String generate(String aUrl, OllamaChatRequest aRequest, } @Override - public float[][] generateEmbeddings(String aUrl, OllamaEmbedRequest aRequest) throws IOException + public List> embed(String aUrl, OllamaEmbedRequest aRequest) + throws IOException { var request = HttpRequest.newBuilder() // .uri(URI.create(appendIfMissing(aUrl, "/") + "api/embed")) // @@ -185,6 +184,10 @@ public float[][] generateEmbeddings(String aUrl, OllamaEmbedRequest aRequest) th var rawResponse = sendRequest(request); + if (aRequest.input().size() == 1 && rawResponse.statusCode() >= HTTP_BAD_REQUEST) { + LOG.error("Error embedding string [{}]", aRequest.input().get(0)); + } + handleError(rawResponse); try (var is = rawResponse.body()) { @@ -192,7 +195,12 @@ public float[][] generateEmbeddings(String aUrl, OllamaEmbedRequest aRequest) th collectMetrics(response); - return response.embeddings(); + var result = new ArrayList>(); + for (int i = 0; i < response.embeddings().length; i++) { + result.add(Pair.of(aRequest.input().get(i), response.embeddings()[i])); + } + + return result; } } @@ -222,17 +230,17 @@ private void collectMetrics(OllamaTokenMetrics response) private void collectMetrics(OllamaEmbedResponse response) { if (LOG.isDebugEnabled()) { - var loadDuration = response.loadDuration() / 1_000_000_000; - var totalDuration = response.totalDuration() / 1_000_000_000; - var evalDuration = (response.totalDuration() - response.loadDuration()) / 1_000_000_000; - var promptEvalTokenPerSecond = evalDuration > 0 - ? response.promptEvalCount() / evalDuration + var loadDurationMs = response.loadDuration() / 1_000_000; + var evalDurationMs = (response.totalDuration() - response.loadDuration()) / 1_000_000d; + var promptEvalTokenPerSecond = evalDurationMs > 0 + ? (double) response.promptEvalCount() / evalDurationMs * 1000.0 : 0; + var totalDurationMs = response.totalDuration() / 1_000_000d; LOG.debug("Tokens - prompt: {} ({} per sec)", // response.promptEvalCount(), // - promptEvalTokenPerSecond); - LOG.debug("Timings - load: {}sec response: {}s total: {}sec", // - loadDuration, evalDuration, totalDuration); + format("%.2f", promptEvalTokenPerSecond)); + LOG.debug("Timings - load: {}ms response: {}ms total: {}ms", // + loadDurationMs, evalDurationMs, totalDurationMs); } } diff --git a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaEmbedRequest.java b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaEmbedRequest.java index ebdea25601e..466107179c0 100644 --- a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaEmbedRequest.java +++ b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaEmbedRequest.java @@ -31,13 +31,13 @@ import de.tudarmstadt.ukp.inception.recommendation.imls.llm.support.traits.Option; @JsonIgnoreProperties(ignoreUnknown = true) -public record OllamaEmbedRequest(String model, List input, +public record OllamaEmbedRequest(String model, List input, boolean truncate, @JsonInclude(Include.NON_EMPTY) Map options) { private OllamaEmbedRequest(Builder builder) { - this(builder.model, builder.input, builder.options); + this(builder.model, builder.input, builder.truncate, builder.options); } public static Builder builder() @@ -48,6 +48,7 @@ public static Builder builder() public static final class Builder { private String model; + private boolean truncate = true; private List input = new ArrayList<>(); private Map options = new HashMap<>(); diff --git a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaEmbedResponse.java b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaEmbedResponse.java index 3c8e6635ed8..9b2e99a3893 100644 --- a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaEmbedResponse.java +++ b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaEmbedResponse.java @@ -17,12 +17,15 @@ */ package de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; +@JsonIgnoreProperties(ignoreUnknown = true) public record OllamaEmbedResponse( // String model, // float[][] embeddings, // @JsonProperty("total_duration") long totalDuration, // @JsonProperty("load_duration") long loadDuration, // - @JsonProperty("prompt_eval_count") long promptEvalCount) -{} + @JsonProperty("prompt_eval_count") long promptEvalCount +// @JsonAnySetter Map additionalFields +) {} diff --git a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaMetricsImpl.java b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaMetricsImpl.java index 23a6bdbdeec..353ce1a1c75 100644 --- a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaMetricsImpl.java +++ b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/client/OllamaMetricsImpl.java @@ -22,10 +22,8 @@ import org.springframework.jmx.export.annotation.ManagedMetric; import org.springframework.jmx.export.annotation.ManagedOperation; import org.springframework.jmx.export.annotation.ManagedResource; -import org.springframework.stereotype.Component; @ManagedResource -@Component public class OllamaMetricsImpl implements OllamaMetrics { diff --git a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/config/OllamaRecommenderAutoConfiguration.java b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/config/OllamaRecommenderAutoConfiguration.java index 958d2b23130..c0dc5915ed5 100644 --- a/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/config/OllamaRecommenderAutoConfiguration.java +++ b/inception/inception-imls-ollama/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/llm/ollama/config/OllamaRecommenderAutoConfiguration.java @@ -27,18 +27,27 @@ import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaClient; import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaClientImpl; import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaMetrics; +import de.tudarmstadt.ukp.inception.recommendation.imls.llm.ollama.client.OllamaMetricsImpl; import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService; @Configuration -@ConditionalOnProperty(prefix = "recommender.ollama", name = "enabled", havingValue = "true", matchIfMissing = false) public class OllamaRecommenderAutoConfiguration { + // We need this for the assistant atm, so it's not covered by the feature flag @Bean public OllamaClient ollamaClient(OllamaMetrics aMetrics) { return new OllamaClientImpl(HttpClient.newBuilder().build(), aMetrics); } + @Bean + public OllamaMetrics ollamaMetrics() + { + return new OllamaMetricsImpl(); + } + + @ConditionalOnProperty(prefix = "recommender.ollama", name = "enabled", havingValue = "true", // + matchIfMissing = false) @Bean public OllamaRecommenderFactory ollamaRecommenderFactory(OllamaClient aClient, AnnotationSchemaService aSchemaService) diff --git a/inception/inception-project-export/pom.xml b/inception/inception-project-export/pom.xml index 98ec5835dad..544e05a9f27 100644 --- a/inception/inception-project-export/pom.xml +++ b/inception/inception-project-export/pom.xml @@ -98,6 +98,10 @@ de.tudarmstadt.ukp.inception.app inception-ui-core + + de.tudarmstadt.ukp.inception.app + inception-scheduling + org.apache.wicket diff --git a/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/ProjectExportServiceImpl.java b/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/ProjectExportServiceImpl.java index 855f0695562..0124da3b1f3 100644 --- a/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/ProjectExportServiceImpl.java +++ b/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/ProjectExportServiceImpl.java @@ -31,9 +31,9 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.lang.invoke.MethodHandles; import java.time.Duration; import java.util.ArrayList; -import java.util.Deque; import java.util.Enumeration; import java.util.HashSet; import java.util.LinkedList; @@ -82,6 +82,7 @@ import de.tudarmstadt.ukp.inception.project.export.task.backup.BackupProjectExportTask; import de.tudarmstadt.ukp.inception.project.export.task.curated.CuratedDocumentsProjectExportRequest; import de.tudarmstadt.ukp.inception.project.export.task.curated.CuratedDocumentsProjectExportTask; +import de.tudarmstadt.ukp.inception.scheduling.SchedulingService; import de.tudarmstadt.ukp.inception.support.json.JSONUtil; import de.tudarmstadt.ukp.inception.support.logging.BaseLoggers; import de.tudarmstadt.ukp.inception.support.logging.LogMessage; @@ -97,7 +98,7 @@ public class ProjectExportServiceImpl { public static final String EXPORTED_PROJECT = "exportedproject"; - private final Logger log = LoggerFactory.getLogger(getClass()); + private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Duration CLEANUP_INTERVAL = Duration.ofMinutes(5); private static final Duration STALE_EXPORT_EXPIRY = Duration.ofMinutes(30); @@ -107,6 +108,7 @@ public class ProjectExportServiceImpl private final ExecutorService taskExecutorService; private final ScheduledExecutorService cleaningScheduler; private final ApplicationContext applicationContext; + private final SchedulingService schedulingService; private final List exportersProxy; private List exporters; @@ -114,11 +116,12 @@ public class ProjectExportServiceImpl @Autowired public ProjectExportServiceImpl(ApplicationContext aApplicationContext, @Lazy @Autowired(required = false) List aExporters, - @Autowired ProjectService aProjectService) + @Autowired ProjectService aProjectService, SchedulingService aSchedulingService) { applicationContext = aApplicationContext; exportersProxy = aExporters; projectService = aProjectService; + schedulingService = aSchedulingService; taskExecutorService = Executors.newFixedThreadPool(4); @@ -151,7 +154,7 @@ public void onContextRefreshedEvent(ContextRefreshedEvent aEvent) Set> exporterClasses = new HashSet<>(); for (ProjectExporter init : exps) { if (exporterClasses.add(init.getClass())) { - log.debug("Found project exporter: {}", + LOG.debug("Found project exporter: {}", ClassUtils.getAbbreviatedName(init.getClass(), 20)); } else { @@ -215,7 +218,7 @@ public void exportProject(FullProjectExportRequest aRequest, ProjectExportTaskMo catch (IOException e) { aMonitor.addMessage(LogMessage.error(this, "Unable to delete temporary export directory [%s]", exportTempDir)); - log.error("Unable to delete temporary export directory [{}]", exportTempDir); + LOG.error("Unable to delete temporary export directory [{}]", exportTempDir); } } } @@ -244,10 +247,12 @@ private ExportedProject exportProjectToZip(FullProjectExportRequest aRequest, var exportersSeen = new HashSet>(); Set exportersDeferred = SetUtils.newIdentityHashSet(); - ExportedProject exProject = new ExportedProject(); + var exProject = new ExportedProject(); exProject.setName(aRequest.getProject().getName()); exProject.setSlug(aRequest.getProject().getSlug()); + // Exporting without suspension is probably ok + // try (var ctx = schedulingService.whileSuspended(aRequest.getProject())) { try { while (!deque.isEmpty()) { var exporter = deque.pop(); @@ -258,13 +263,13 @@ private ExportedProject exportProjectToZip(FullProjectExportRequest aRequest, } if (exportersSeen.containsAll(exporter.getExportDependencies())) { - log.debug("Applying project exporter: {}", exporter); + LOG.debug("Applying project exporter: {}", exporter); exporter.exportData(aRequest, aMonitor, exProject, aZip); exportersSeen.add(exporter.getClass()); exportersDeferred.clear(); } else { - log.debug( + LOG.debug( "Deferring project exporter as dependencies are not yet fulfilled: [{}]", exporter); deque.add(exporter); @@ -294,19 +299,19 @@ public Project importProject(ProjectImportRequest aRequest, ZipFile aZip) { long start = currentTimeMillis(); - Deque deque = new LinkedList<>(exporters); - Set> initsSeen = new HashSet<>(); + var deque = new LinkedList(exporters); + var initsSeen = new HashSet>(); Set initsDeferred = SetUtils.newIdentityHashSet(); - Project project = new Project(); + var project = new Project(); - try { - ExportedProject exProject = loadExportedProject(aZip); + try (var ctx = schedulingService.whileSuspended(project)) { + var exProject = loadExportedProject(aZip); project.setName(exProject.getName()); // Old projects do not have a slug, so we derive one from the project name - String slug = exProject.getSlug(); + var slug = exProject.getSlug(); if (isBlank(slug)) { slug = projectService.deriveSlugFromName(exProject.getName()); } @@ -319,7 +324,7 @@ public Project importProject(ProjectImportRequest aRequest, ZipFile aZip) // Apply the importers while (!deque.isEmpty()) { - ProjectExporter importer = deque.pop(); + var importer = deque.pop(); if (initsDeferred.contains(importer)) { throw new IllegalStateException("Circular initializer dependencies in " @@ -327,13 +332,13 @@ public Project importProject(ProjectImportRequest aRequest, ZipFile aZip) } if (initsSeen.containsAll(importer.getImportDependencies())) { - log.debug("Applying project importer: {}", importer); + LOG.debug("Applying project importer: {}", importer); importer.importData(aRequest, project, exProject, aZip); initsSeen.add(importer.getClass()); initsDeferred.clear(); } else { - log.debug( + LOG.debug( "Deferring project exporter as dependencies are not yet fulfilled: [{}]", importer); deque.add(importer); @@ -345,7 +350,7 @@ public Project importProject(ProjectImportRequest aRequest, ZipFile aZip) throw new ProjectExportException("Project import failed", e); } - log.info("Imported project [{}]({}) ({})", project.getName(), project.getId(), + LOG.info("Imported project {} ({})", project, formatDurationWords(currentTimeMillis() - start, true, true)); return project; @@ -462,20 +467,20 @@ public boolean cancelTask(ProjectExportTaskHandle aHandle) boolean cancelled = task.future.cancel(true); if (cancelled) { - log.debug("Cancelled running export"); + LOG.debug("Cancelled running export"); } else { - log.debug("Cancelled completed export"); + LOG.debug("Cancelled completed export"); } File exportedFile = task.task.getMonitor().getExportedFile(); if (exportedFile != null && exportedFile.exists()) { - log.debug("Deleted exported file {}", exportedFile); + LOG.debug("Deleted exported file {}", exportedFile); try { FileUtils.forceDelete(exportedFile); } catch (IOException ex) { - log.error("Unable to clean up cancelled exported file [{}]:", exportedFile, ex); + LOG.error("Unable to clean up cancelled exported file [{}]:", exportedFile, ex); } } @@ -498,7 +503,7 @@ private void cleanUp() // Remove task info from the tasks map one hour after completion/failure/etc. long age = System.currentTimeMillis() - monitor.getEndTime(); if (age > STALE_EXPORT_EXPIRY.toMillis()) { - log.info("Cleaning up stale export task for project [{}]:", + LOG.info("Cleaning up stale export task for project [{}]:", e.getValue().task.getRequest().getProject().getName()); tasks.remove(e.getKey()); e.getValue().task.destroy(); @@ -508,7 +513,7 @@ private void cleanUp() FileUtils.forceDelete(exportedFile); } catch (IOException ex) { - log.error("Unable to clean up stale exported file [{}]:", exportedFile, ex); + LOG.error("Unable to clean up stale exported file [{}]:", exportedFile, ex); } } } diff --git a/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/config/ProjectExportServiceAutoConfiguration.java b/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/config/ProjectExportServiceAutoConfiguration.java index 17aa1b08630..abd5942ec6e 100644 --- a/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/config/ProjectExportServiceAutoConfiguration.java +++ b/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/config/ProjectExportServiceAutoConfiguration.java @@ -40,6 +40,7 @@ import de.tudarmstadt.ukp.inception.project.export.settings.ExportProjectSettingsPanelFactory; import de.tudarmstadt.ukp.inception.project.export.task.backup.BackupProjectExportExtension; import de.tudarmstadt.ukp.inception.project.export.task.curated.CuratedDocumentsProjectExportExtension; +import de.tudarmstadt.ukp.inception.scheduling.SchedulingService; @Configuration @AutoConfigureAfter(name = { @@ -50,9 +51,10 @@ public class ProjectExportServiceAutoConfiguration @Bean public ProjectExportService projectExportService(ApplicationContext aApplicationContext, @Lazy @Autowired(required = false) List aExporters, - ProjectService aProjectService) + ProjectService aProjectService, SchedulingService aSchedulingService) { - return new ProjectExportServiceImpl(aApplicationContext, aExporters, aProjectService); + return new ProjectExportServiceImpl(aApplicationContext, aExporters, aProjectService, + aSchedulingService); } @ConditionalOnProperty(name = "dashboard.legacy-export", havingValue = "false", matchIfMissing = true) diff --git a/inception/inception-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/project/ProjectServiceImpl.java b/inception/inception-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/project/ProjectServiceImpl.java index 1c3c90b0105..fcc08e3b6e2 100644 --- a/inception/inception-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/project/ProjectServiceImpl.java +++ b/inception/inception-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/project/ProjectServiceImpl.java @@ -44,6 +44,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.lang.invoke.MethodHandles; import java.nio.file.NoSuchFileException; import java.util.ArrayList; import java.util.Collection; @@ -113,7 +114,7 @@ public class ProjectServiceImpl implements ProjectService { - private final Logger log = LoggerFactory.getLogger(getClass()); + private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private final EntityManager entityManager; private final UserDao userRepository; @@ -149,7 +150,7 @@ public Project createProject(Project aProject) throws IOException entityManager.persist(aProject); try (var logCtx = withProjectLogger(aProject)) { - log.info("Created project [{}]({})", aProject.getName(), aProject.getId()); + LOG.info("Created project {}", aProject); String path = repositoryProperties.getPath().getAbsolutePath() + "/" + PROJECT_FOLDER + "/" + aProject.getId(); @@ -176,7 +177,7 @@ public void createProjectPermission(ProjectPermission aPermission) try (var logCtx = withProjectLogger(aPermission.getProject())) { entityManager.persist(aPermission); - log.info("Created permission [{}] for user [{}] on project {}", aPermission.getLevel(), + LOG.info("Created permission [{}] for user [{}] on project {}", aPermission.getLevel(), aPermission.getUser(), aPermission.getProject()); applicationEventPublisher.publishEvent(new ProjectPermissionsChangedEvent(this, @@ -498,8 +499,8 @@ public void setProjectPermissionLevels(String aUser, Project aProject, Collection aLevels) { try (var logCtx = withProjectLogger(aProject)) { - Set levelsToBeGranted = new HashSet<>(aLevels); - List permissions = new ArrayList<>(); + var levelsToBeGranted = new HashSet(aLevels); + var permissions = new ArrayList(); try { permissions.addAll(listProjectPermissionLevel(aUser, aProject)); } @@ -508,14 +509,14 @@ public void setProjectPermissionLevels(String aUser, Project aProject, } // Remove permissions that no longer exist - List revokedPermissions = new ArrayList<>(); - for (ProjectPermission permission : permissions) { + var revokedPermissions = new ArrayList(); + for (var permission : permissions) { if (!aLevels.contains(permission.getLevel())) { revokedPermissions.add(permission); entityManager.remove(permission); - log.info("Removed permission [{}] for user [{}] on project {}", + LOG.info("Removed permission [{}] for user [{}] on project {}", permission.getLevel(), permission.getUser(), permission.getProject()); } else { @@ -524,15 +525,15 @@ public void setProjectPermissionLevels(String aUser, Project aProject, } // Grant new permissions - List grantedPermissions = new ArrayList<>(); - for (PermissionLevel level : levelsToBeGranted) { - ProjectPermission permission = new ProjectPermission(aProject, aUser, level); + var grantedPermissions = new ArrayList(); + for (var level : levelsToBeGranted) { + var permission = new ProjectPermission(aProject, aUser, level); grantedPermissions.add(permission); entityManager.persist(permission); - log.info("Granted permission [{}] for user {} on project {}", level, aUser, + LOG.info("Granted permission [{}] for user {} on project {}", level, aUser, aProject); } @@ -687,12 +688,12 @@ public void removeProject(Project aProject) throws IOException FastIOUtils.delete(new File(path)); } catch (FileNotFoundException | NoSuchFileException e) { - log.info("Project directory to be deleted was not found: [{}]. Ignoring.", path); + LOG.info("Project directory to be deleted was not found: [{}]. Ignoring.", path); } applicationEventPublisher.publishEvent(new AfterProjectRemovedEvent(this, aProject)); - log.info("Removed project {} ({})", aProject, + LOG.info("Removed project {} ({})", aProject, formatDurationWords(System.currentTimeMillis() - start, true, true)); } } @@ -705,7 +706,7 @@ public void removeProjectPermission(ProjectPermission aPermission) try (var logCtx = withProjectLogger(aPermission.getProject())) { entityManager.remove(aPermission); - log.info("Removed permission [{}] for user [{}] on project {}", aPermission.getLevel(), + LOG.info("Removed permission [{}] for user [{}] on project {}", aPermission.getLevel(), aPermission.getUser(), aPermission.getProject()); applicationEventPublisher.publishEvent(new ProjectPermissionsChangedEvent(this, @@ -814,7 +815,7 @@ public void onContextRefreshedEvent(ContextRefreshedEvent aEvent) Set> initializerClasses = new HashSet<>(); for (ProjectInitializer init : inits) { if (initializerClasses.add(init.getClass())) { - log.debug("Found project initializer: {}", + LOG.debug("Found project initializer: {}", ClassUtils.getAbbreviatedName(init.getClass(), 20)); } else { @@ -837,13 +838,13 @@ private void addMissingProjectSlugs() for (Project project : projects) { String slug = deriveSlugFromName(project.getName()); if (!isValidProjectSlug(slug)) { - log.warn("Attempt to derive slug from project name [{}] resulted in invalid slug " + LOG.warn("Attempt to derive slug from project name [{}] resulted in invalid slug " + "[{}], generating random slug...", project.getName(), slug); slug = generateRandomSlug(); } slug = deriveUniqueSlug(slug); project.setSlug(slug); - log.info("Auto-generated slug [{}] for project {}", slug, project); + LOG.info("Auto-generated slug [{}] for project {}", slug, project); } } @@ -925,7 +926,7 @@ public void initializeProject(ProjectInitializationRequest aRequest, var initializerName = initializer.getName(); if (applied.contains(initializer.getClass())) { - log.debug("Skipping project initializer that was already applied: [{}]", + LOG.debug("Skipping project initializer that was already applied: [{}]", initializerName); continue; } @@ -943,13 +944,13 @@ public void initializeProject(ProjectInitializationRequest aRequest, } if (applied.containsAll(initializer.getDependencies())) { - log.debug("Applying project initializer: [{}]", initializerName); + LOG.debug("Applying project initializer: [{}]", initializerName); initializer.configure(aRequest); applied.add(initializer.getClass()); initsDeferred.clear(); } else { - log.debug("Deferring project initializer as dependencies are not yet fulfilled: {}", + LOG.debug("Deferring project initializer as dependencies are not yet fulfilled: {}", initializer); toApply.add(initializer); initsDeferred.add(initializer); diff --git a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/MatchResult.java b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/MatchResult.java index 7c874788caa..6e5f8dafbdf 100644 --- a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/MatchResult.java +++ b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/MatchResult.java @@ -31,7 +31,7 @@ public enum MatchResult UNQUEUE_EXISTING_AND_QUEUE_THIS, /** - * Discard the incoming task if it matches an already enqueued task. If and matching task is + * Discard the incoming task if it matches an already enqueued task. If a matching task is * already scheduled or running, then queue the incoming task. */ DISCARD_OR_QUEUE_THIS; diff --git a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/NotifyingTaskMonitor.java b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/NotifyingTaskMonitor.java index 39029250233..d13e4417d63 100644 --- a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/NotifyingTaskMonitor.java +++ b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/NotifyingTaskMonitor.java @@ -85,9 +85,7 @@ public synchronized void setStateAndProgress(TaskState aState, int aProgress, in protected void onDestroy() { var msg = new MTaskStateUpdate(this, true); - if (getUser() != null) { - schedulerWebsocketController.dispatch(msg); - } + schedulerWebsocketController.dispatch(msg); } protected void sendNotification() @@ -98,9 +96,7 @@ protected void sendNotification() var msg = new MTaskStateUpdate(this); if (lastUpdate == null || !lastUpdate.equals(msg)) { - if (getUser() != null) { - schedulerWebsocketController.dispatch(msg); - } + schedulerWebsocketController.dispatch(msg); lastUpdate = msg; } } diff --git a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/SchedulingService.java b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/SchedulingService.java index 57a19c9a409..e01ffdde97f 100644 --- a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/SchedulingService.java +++ b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/SchedulingService.java @@ -19,6 +19,7 @@ import java.util.List; import java.util.Optional; +import java.util.concurrent.TimeoutException; import java.util.function.Predicate; import de.tudarmstadt.ukp.clarin.webanno.model.Project; @@ -92,4 +93,17 @@ public interface SchedulingService * the task to be executed. */ void executeSync(Task aTask); + + void suspendTasks(Project aProject) throws TimeoutException; + + void resumeTasks(Project aProject); + + SuspensionContext whileSuspended(Project aProject) throws TimeoutException; + + interface SuspensionContext + extends AutoCloseable + { + @Override + void close(); + } } diff --git a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/SchedulingServiceImpl.java b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/SchedulingServiceImpl.java index bca81b7b040..ac1f127b821 100644 --- a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/SchedulingServiceImpl.java +++ b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/SchedulingServiceImpl.java @@ -19,22 +19,28 @@ import static de.tudarmstadt.ukp.inception.scheduling.MatchResult.NO_MATCH; import static de.tudarmstadt.ukp.inception.scheduling.MatchResult.UNQUEUE_EXISTING_AND_QUEUE_THIS; +import static java.lang.System.currentTimeMillis; +import static java.lang.System.identityHashCode; import static java.util.concurrent.TimeUnit.SECONDS; import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.time.Duration; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; -import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Predicate; import org.apache.commons.lang3.Validate; @@ -76,6 +82,7 @@ public class SchedulingServiceImpl private final List enqueuedTasks; private final List pendingAcknowledgement; private final Set deletionPending; + private final Map suspended; @Autowired public SchedulingServiceImpl(ApplicationContext aApplicationContext, @@ -89,6 +96,7 @@ public SchedulingServiceImpl(ApplicationContext aApplicationContext, enqueuedTasks = Collections.synchronizedList(new ArrayList<>()); pendingAcknowledgement = Collections.synchronizedList(new ArrayList<>()); deletionPending = Collections.synchronizedSet(new LinkedHashSet<>()); + suspended = Collections.synchronizedMap(new LinkedHashMap<>()); watchdog = Executors.newScheduledThreadPool(1); watchdog.scheduleAtFixedRate(this::scheduleEligibleTasks, 5, 5, SECONDS); watchdog.scheduleAtFixedRate(this::cleanUpTasks, 10, 10, SECONDS); @@ -97,7 +105,14 @@ public SchedulingServiceImpl(ApplicationContext aApplicationContext, private void beforeExecute(Thread aThread, Runnable aRunnable) { Validate.notNull(aRunnable, "Task cannot be null"); - runningTasks.add((Task) aRunnable); + synchronized (runningTasks) { + if (!runningTasks.contains((Task) aRunnable)) { + runningTasks.add((Task) aRunnable); + } + else { + LOG.warn("Task running: {}", aRunnable); + } + } LOG.debug("Starting task: {} ", aRunnable); } @@ -115,7 +130,10 @@ private void afterExecute(Runnable aRunnable, Throwable aThrowable) private void handleTaskEnded(Task aTask) { - runningTasks.remove(aTask); + synchronized (runningTasks) { + runningTasks.remove(aTask); + } + if (aTask.getMonitor().isCancelled() || !aTask.getScope().isDestroyOnEnd()) { pendingAcknowledgement.add(aTask); } @@ -233,7 +251,7 @@ public synchronized void enqueue(Task aTask) } } - for (Task taskToUnqueue : tasksToUnqueue) { + for (var taskToUnqueue : tasksToUnqueue) { LOG.debug("Matching task already queued - unqueuing exsting: [{}] in favor of " + "incoming [{}]", taskToUnqueue, aTask); enqueuedTasks.remove(taskToUnqueue); @@ -257,11 +275,83 @@ public synchronized void enqueue(Task aTask) return; } + if (aTask.getProject() != null && suspended.containsKey(aTask.getProject())) { + LOG.debug("Tasks for project suspended - adding to queue: [{}]", aTask); + enqueuedTasks.add(aTask); + return; + } + schedule(aTask); logState(); } + @Override + public void suspendTasks(Project aProject) throws TimeoutException + { + synchronized (suspended) { + var suspendCount = suspended.computeIfAbsent(aProject, v -> new AtomicInteger(0)); + suspendCount.incrementAndGet(); + LOG.debug("Suspending tasks for {} [{}] [{}]", aProject, identityHashCode(aProject), + suspendCount); + } + + var startTime = currentTimeMillis(); + var timeout = Duration.ofMinutes(1); + var timeoutMillis = timeout.toMillis(); + + while (runningTasks.stream().anyMatch(t -> aProject.equals(t.getProject()))) { + LOG.trace( + "Waiting for running tasks to end before proceeding with suspension on project {}", + aProject); + + if (currentTimeMillis() - startTime > timeoutMillis) { + resumeTasks(aProject); + throw new TimeoutException( + "Waiting for tasks to finish took longer than " + timeout); + } + + try { + Thread.sleep(100); + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.error("Thread was interrupted while waiting for tasks to end.", e); + } + } + } + + @Override + public void resumeTasks(Project aProject) + { + synchronized (suspended) { + var suspendCount = suspended.get(aProject); + if (suspendCount != null) { + LOG.debug("Resuming tasks for {} [{}] [{}]", aProject, identityHashCode(aProject), + suspendCount); + if (suspendCount.decrementAndGet() == 0) { + suspended.remove(aProject); + scheduleEligibleTasks(); + } + } + } + } + + @Override + public SuspensionContext whileSuspended(Project aProject) throws TimeoutException + { + suspendTasks(aProject); + + return new SuspensionContext() + { + @Override + public void close() + { + resumeTasks(aProject); + } + }; + } + private MatchResult matchTask(Task aTask, Task aEnqueueTask) { if (aTask instanceof MatchableTask task) { @@ -339,14 +429,24 @@ private synchronized void cleanUpTasks() private synchronized void scheduleEligibleTasks() { - Iterator i = enqueuedTasks.iterator(); + var i = enqueuedTasks.iterator(); while (i.hasNext()) { - Task t = i.next(); - if (!getScheduledAndRunningTasks().contains(t) && t.isReadyToStart()) { - i.remove(); - schedule(t); + var t = i.next(); + if (!t.isReadyToStart()) { + continue; + } + + if (getScheduledAndRunningTasks().contains(t)) { + continue; } + + if (t.getProject() != null && suspended.containsKey(t.getProject())) { + continue; + } + + i.remove(); + schedule(t); } logState(); @@ -530,10 +630,19 @@ public void destroy() private void logState() { - getEnqueuedTasks().forEach(t -> LOG.debug("Queued : {}", t)); - getScheduledTasks().forEach(t -> LOG.debug("Scheduled : {}", t)); - getRunningTasks().forEach(t -> LOG.debug("Running : {}", t)); - getTasksPendingAcknowledgment().forEach(t -> LOG.trace("Pending ack : {}", t)); + if (LOG.isDebugEnabled()) { + var lines = new ArrayList(); + getEnqueuedTasks().forEach(t -> lines.add(" Queued : " + t)); + getScheduledTasks().forEach(t -> lines.add(" Scheduled : " + t)); + getRunningTasks().forEach(t -> lines.add(" Running : " + t)); + suspended.forEach((p, c) -> lines.add(" Suspended : " + p + " [" + c + "]")); + deletionPending.forEach(p -> lines.add(" Deleting : " + p)); + getTasksPendingAcknowledgment().forEach(t -> lines.add(" Pending ack : " + t)); + if (!lines.isEmpty()) { + LOG.debug("Scheduler state:"); + lines.forEach(LOG::debug); + } + } } @Override diff --git a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/Task.java b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/Task.java index b43ab6adac0..6bc26779059 100644 --- a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/Task.java +++ b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/Task.java @@ -90,8 +90,8 @@ public void afterPropertiesSet() { if (monitor == null) { // For tasks that have a parent task, we use a non-notifying monitor. Also, we do not - // report such subtasks ia the SchedulerControllerImpl - they are internal. - if (schedulerController != null && sessionOwner != null && parentTask == null) { + // report such subtasks via the SchedulerControllerImpl - they are internal. + if (schedulerController != null && parentTask == null) { monitor = new NotifyingTaskMonitor(this, schedulerController); } else { @@ -196,14 +196,21 @@ public final void run() } } - public void runSync() + public final void runSync() { try { monitor.setState(TaskState.RUNNING); + execute(); + if (monitor.getState() == TaskState.RUNNING) { monitor.setState(TaskState.COMPLETED); } + + if (monitor.getState() == TaskState.COMPLETED) { + LOG.debug("Task [{}] completed (trigger: [{}]) in {}", getTitle(), getTrigger(), + monitor.getDuration()); + } } catch (Exception e) { monitor.addMessage(LogMessage.error(this, "Task failed.")); diff --git a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/TaskMonitor.java b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/TaskMonitor.java index c3d0baa8c5c..69e16ce5e31 100644 --- a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/TaskMonitor.java +++ b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/TaskMonitor.java @@ -21,6 +21,7 @@ import static de.tudarmstadt.ukp.inception.scheduling.TaskState.COMPLETED; import static de.tudarmstadt.ukp.inception.scheduling.TaskState.FAILED; import static de.tudarmstadt.ukp.inception.scheduling.TaskState.NOT_STARTED; +import static java.lang.System.currentTimeMillis; import static java.util.Arrays.asList; import java.util.Deque; @@ -60,7 +61,7 @@ public TaskMonitor(Task aTask) user = aTask.getUser().map(User::getUsername).orElse(null); project = aTask.getProject(); title = aTask.getTitle(); - createTime = System.currentTimeMillis(); + createTime = currentTimeMillis(); cancellable = aTask.isCancellable(); } @@ -96,15 +97,15 @@ public String getType() public synchronized void setState(TaskState aState) { - state = aState; - if (state == NOT_STARTED && aState != NOT_STARTED) { - startTime = System.currentTimeMillis(); + startTime = currentTimeMillis(); } if (asList(COMPLETED, CANCELLED, FAILED).contains(aState)) { - endTime = System.currentTimeMillis(); + endTime = currentTimeMillis(); } + + state = aState; } public synchronized long getCreateTime() @@ -219,4 +220,17 @@ public synchronized Progress toProgress() { return new Progress(progress, maxProgress); } + + public long getDuration() + { + if (startTime < 0) { + return -1; + } + + if (endTime > 0) { + return endTime - startTime; + } + + return currentTimeMillis() - startTime; + } } diff --git a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/controller/SchedulerWebsocketController.java b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/controller/SchedulerWebsocketController.java index 4d30425514f..8cfc64df780 100644 --- a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/controller/SchedulerWebsocketController.java +++ b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/controller/SchedulerWebsocketController.java @@ -53,7 +53,7 @@ static String getProjectTaskUpdatesTopic(long aProjectId) { var properties = new Properties(); properties.setProperty(PARAM_PROJECT, String.valueOf(aProjectId)); - var replacer = new PropertyPlaceholderHelper("{", "}", null, false); + var replacer = new PropertyPlaceholderHelper("{", "}", null, null, false); var topic = replacer.replacePlaceholders(PROJECT_TASKS_TOPIC_TEMPLATE, properties); return topic; } diff --git a/inception/inception-search-core/src/main/java/de/tudarmstadt/ukp/inception/search/SearchServiceImpl.java b/inception/inception-search-core/src/main/java/de/tudarmstadt/ukp/inception/search/SearchServiceImpl.java index 664c260f27e..14e7982eecb 100644 --- a/inception/inception-search-core/src/main/java/de/tudarmstadt/ukp/inception/search/SearchServiceImpl.java +++ b/inception/inception-search-core/src/main/java/de/tudarmstadt/ukp/inception/search/SearchServiceImpl.java @@ -70,6 +70,7 @@ import de.tudarmstadt.ukp.inception.documents.event.BeforeDocumentRemovedEvent; import de.tudarmstadt.ukp.inception.preferences.PreferencesService; import de.tudarmstadt.ukp.inception.project.api.ProjectService; +import de.tudarmstadt.ukp.inception.project.api.event.AfterProjectCreatedEvent; import de.tudarmstadt.ukp.inception.project.api.event.AfterProjectRemovedEvent; import de.tudarmstadt.ukp.inception.project.api.event.BeforeProjectRemovedEvent; import de.tudarmstadt.ukp.inception.scheduling.Progress; @@ -81,7 +82,6 @@ import de.tudarmstadt.ukp.inception.search.config.SearchServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.search.config.SearchServiceProperties; import de.tudarmstadt.ukp.inception.search.index.IndexRebuildRequiredException; -import de.tudarmstadt.ukp.inception.search.index.PhysicalIndexFactory; import de.tudarmstadt.ukp.inception.search.index.PhysicalIndexRegistry; import de.tudarmstadt.ukp.inception.search.model.BulkIndexingContext; import de.tudarmstadt.ukp.inception.search.model.Index; @@ -271,8 +271,7 @@ private synchronized Index loadIndex(long aProjectId) } // Get the index factory - PhysicalIndexFactory indexFactory = physicalIndexRegistry - .getIndexFactory(DEFAULT_PHSYICAL_INDEX_FACTORY); + var indexFactory = physicalIndexRegistry.getIndexFactory(DEFAULT_PHSYICAL_INDEX_FACTORY); if (indexFactory == null) { return null; @@ -284,6 +283,17 @@ private synchronized Index loadIndex(long aProjectId) return index; } + @EventListener + public void onAfterProjectCreated(AfterProjectCreatedEvent aEvent) + { + LOG.trace("Starting onAfterProjectCreated"); + + // This will help us block off individual indexDocument tasks that are being triggered + // while the project is imported or initialized (if those processes run in a tasks-suspended + // context. + enqueueReindexTask(aEvent.getProject(), "onAfterProjectCreated"); + } + /** * Triggered before a project is removed. * @@ -293,9 +303,9 @@ private synchronized Index loadIndex(long aProjectId) * if the index cannot be removed. */ @EventListener - public void beforeProjectRemove(BeforeProjectRemovedEvent aEvent) throws IOException + public void onBeforeProjectRemove(BeforeProjectRemovedEvent aEvent) throws IOException { - Project project = aEvent.getProject(); + var project = aEvent.getProject(); if (isBeingDeleted(project.getId())) { LOG.trace( @@ -325,7 +335,7 @@ public void beforeProjectRemove(BeforeProjectRemovedEvent aEvent) throws IOExcep } @EventListener - public void afterProjectRemove(AfterProjectRemovedEvent aEvent) throws IOException + public void onAfterProjectRemove(AfterProjectRemovedEvent aEvent) throws IOException { synchronized (indexes) { indexes.remove(aEvent.getProject().getId()); @@ -388,7 +398,7 @@ private PooledIndex acquireIndex(long aProjectId) } @EventListener - public void beforeDocumentRemove(BeforeDocumentRemovedEvent aEvent) throws IOException + public void onBeforeDocumentRemove(BeforeDocumentRemovedEvent aEvent) throws IOException { var document = aEvent.getDocument(); var project = document.getProject(); diff --git a/inception/inception-search-core/src/main/java/de/tudarmstadt/ukp/inception/search/scheduling/tasks/IndexingTask_ImplBase.java b/inception/inception-search-core/src/main/java/de/tudarmstadt/ukp/inception/search/scheduling/tasks/IndexingTask_ImplBase.java index 3fb30d8aeac..fc438fd1211 100644 --- a/inception/inception-search-core/src/main/java/de/tudarmstadt/ukp/inception/search/scheduling/tasks/IndexingTask_ImplBase.java +++ b/inception/inception-search-core/src/main/java/de/tudarmstadt/ukp/inception/search/scheduling/tasks/IndexingTask_ImplBase.java @@ -62,15 +62,19 @@ public String toString() var builder = new StringBuilder(); builder.append(getClass().getSimpleName()); builder.append(" [project="); - builder.append(getProject().getName()); + builder.append(getProject()); if (getUser().isPresent()) { builder.append(", user="); builder.append(getUser().get()); } - builder.append(", sourceDocument="); - builder.append(sourceDocument == null ? "null" : sourceDocument.getName()); - builder.append(", annotationDocument="); - builder.append(annotationDocument == null ? "null" : annotationDocument.getName()); + if (sourceDocument != null) { + builder.append(", sourceDocument="); + builder.append(sourceDocument.getName()); + } + if (annotationDocument != null) { + builder.append(", annotationDocument="); + builder.append(annotationDocument.getName()); + } builder.append(", trigger="); builder.append(getTrigger()); builder.append("]"); diff --git a/inception/inception-search-core/src/main/java/de/tudarmstadt/ukp/inception/search/scheduling/tasks/ReindexTask.java b/inception/inception-search-core/src/main/java/de/tudarmstadt/ukp/inception/search/scheduling/tasks/ReindexTask.java index bafa0f3c664..a7973259bb8 100644 --- a/inception/inception-search-core/src/main/java/de/tudarmstadt/ukp/inception/search/scheduling/tasks/ReindexTask.java +++ b/inception/inception-search-core/src/main/java/de/tudarmstadt/ukp/inception/search/scheduling/tasks/ReindexTask.java @@ -21,6 +21,7 @@ */ package de.tudarmstadt.ukp.inception.search.scheduling.tasks; +import static de.tudarmstadt.ukp.inception.scheduling.MatchResult.DISCARD_OR_QUEUE_THIS; import static de.tudarmstadt.ukp.inception.scheduling.MatchResult.NO_MATCH; import static de.tudarmstadt.ukp.inception.scheduling.MatchResult.UNQUEUE_EXISTING_AND_QUEUE_THIS; import static de.tudarmstadt.ukp.inception.scheduling.TaskScope.PROJECT; @@ -82,14 +83,20 @@ public Progress getProgress() public MatchResult matches(Task aTask) { // If a re-indexing task for a project is coming in, we can throw out any scheduled tasks - // for re-indexing and for indexing individual source/annotation documents in the project. - if (aTask instanceof ReindexTask || aTask instanceof IndexSourceDocumentTask + // for indexing individual source/annotation documents in the project. + if (aTask instanceof IndexSourceDocumentTask || aTask instanceof IndexAnnotationDocumentTask) { if (Objects.equals(getProject().getId(), aTask.getProject().getId())) { return UNQUEUE_EXISTING_AND_QUEUE_THIS; } } + if (aTask instanceof ReindexTask) { + if (Objects.equals(getProject().getId(), aTask.getProject().getId())) { + return DISCARD_OR_QUEUE_THIS; + } + } + return NO_MATCH; } diff --git a/inception/inception-ui-dashboard/pom.xml b/inception/inception-ui-dashboard/pom.xml index 44862ca7fef..ce816ecd5bb 100644 --- a/inception/inception-ui-dashboard/pom.xml +++ b/inception/inception-ui-dashboard/pom.xml @@ -106,6 +106,10 @@ de.tudarmstadt.ukp.inception.app inception-support-bootstrap + + de.tudarmstadt.ukp.inception.app + inception-scheduling + org.apache.commons diff --git a/inception/inception-ui-dashboard/src/main/java/de/tudarmstadt/ukp/inception/ui/core/dashboard/projectlist/ProjectTemplateSelectionDialogPanel.java b/inception/inception-ui-dashboard/src/main/java/de/tudarmstadt/ukp/inception/ui/core/dashboard/projectlist/ProjectTemplateSelectionDialogPanel.java index ad301127b8f..eabc99ec43b 100644 --- a/inception/inception-ui-dashboard/src/main/java/de/tudarmstadt/ukp/inception/ui/core/dashboard/projectlist/ProjectTemplateSelectionDialogPanel.java +++ b/inception/inception-ui-dashboard/src/main/java/de/tudarmstadt/ukp/inception/ui/core/dashboard/projectlist/ProjectTemplateSelectionDialogPanel.java @@ -26,7 +26,6 @@ import static java.util.Arrays.asList; import static org.slf4j.LoggerFactory.getLogger; -import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.List; @@ -56,6 +55,7 @@ import de.tudarmstadt.ukp.inception.project.api.ProjectInitializationRequest; import de.tudarmstadt.ukp.inception.project.api.ProjectInitializer; import de.tudarmstadt.ukp.inception.project.api.ProjectService; +import de.tudarmstadt.ukp.inception.scheduling.SchedulingService; import de.tudarmstadt.ukp.inception.support.lambda.LambdaAjaxFormComponentUpdatingBehavior; import de.tudarmstadt.ukp.inception.support.lambda.LambdaAjaxLink; import de.tudarmstadt.ukp.inception.ui.core.dashboard.project.ProjectDashboardPage; @@ -79,6 +79,7 @@ public class ProjectTemplateSelectionDialogPanel private @SpringBean ProjectService projectService; private @SpringBean UserDao userRepository; private @SpringBean PreferencesService preferencesService; + private @SpringBean SchedulingService schedulingService; private LambdaAjaxLink closeDialogButton; private boolean includeSampleData; @@ -153,13 +154,14 @@ private List listInitializers() private void actionCreateProject(AjaxRequestTarget aTarget, ProjectInitializer aInitializer) { - var user = userRepository.getCurrentUser(); aTarget.addChildren(getPage(), IFeedback.class); + + var user = userRepository.getCurrentUser(); var projectSlug = projectService.deriveSlugFromName(user.getUsername()); projectSlug = projectService.deriveUniqueSlug(projectSlug); - try { - var project = new Project(projectSlug); + var project = new Project(projectSlug); + try (var ctx = schedulingService.whileSuspended(project)) { project.setName(user.getUsername() + " - New project"); projectService.createProject(project); projectService.assignRole(project, user, ANNOTATOR, CURATOR, MANAGER); @@ -175,7 +177,7 @@ private void actionCreateProject(AjaxRequestTarget aTarget, ProjectInitializer a ProjectPageBase.setProjectPageParameter(pageParameters, project); setResponsePage(ProjectDashboardPage.class, pageParameters); } - catch (IOException e) { + catch (Exception e) { LOG.error("Unable to create project [{}]", projectSlug, e); error("Unable to create project [" + projectSlug + "]"); } diff --git a/inception/inception-ui-scheduling/src/main/java/de/tudarmstadt/ukp/inception/ui/scheduling/controller/SchedulerWebsocketControllerImpl.java b/inception/inception-ui-scheduling/src/main/java/de/tudarmstadt/ukp/inception/ui/scheduling/controller/SchedulerWebsocketControllerImpl.java index 204cb573f68..bcc5000843e 100644 --- a/inception/inception-ui-scheduling/src/main/java/de/tudarmstadt/ukp/inception/ui/scheduling/controller/SchedulerWebsocketControllerImpl.java +++ b/inception/inception-ui-scheduling/src/main/java/de/tudarmstadt/ukp/inception/ui/scheduling/controller/SchedulerWebsocketControllerImpl.java @@ -62,7 +62,7 @@ public SchedulerWebsocketControllerImpl(SchedulingService aSchedulingService, schedulingService = aSchedulingService; } - @SubscribeMapping(SchedulerWebsocketController.USER_TASKS_TOPIC) + @SubscribeMapping(USER_TASKS_TOPIC) public List onSubscribeToUserTaskUpdates(Principal user) throws AccessDeniedException { @@ -97,8 +97,8 @@ public List onSubscribeToProjectTaskUpdates( public void dispatch(MTaskStateUpdate aUpdate) { if (aUpdate.getUsername() != null) { - msgTemplate.convertAndSendToUser(aUpdate.getUsername(), - "/queue" + SchedulerWebsocketController.USER_TASKS_TOPIC, aUpdate); + msgTemplate.convertAndSendToUser(aUpdate.getUsername(), "/queue" + USER_TASKS_TOPIC, + aUpdate); } if (aUpdate.getProjectId() > 0) { diff --git a/inception/inception-workload-matrix/src/main/java/de/tudarmstadt/ukp/inception/workload/matrix/event/MatrixWorkloadStateWatcher.java b/inception/inception-workload-matrix/src/main/java/de/tudarmstadt/ukp/inception/workload/matrix/event/MatrixWorkloadStateWatcher.java index c1ce8d9164d..a66815d516c 100644 --- a/inception/inception-workload-matrix/src/main/java/de/tudarmstadt/ukp/inception/workload/matrix/event/MatrixWorkloadStateWatcher.java +++ b/inception/inception-workload-matrix/src/main/java/de/tudarmstadt/ukp/inception/workload/matrix/event/MatrixWorkloadStateWatcher.java @@ -22,8 +22,8 @@ import de.tudarmstadt.ukp.inception.documents.event.AnnotationStateChangeEvent; import de.tudarmstadt.ukp.inception.project.api.event.ProjectPermissionsChangedEvent; import de.tudarmstadt.ukp.inception.scheduling.SchedulingService; -import de.tudarmstadt.ukp.inception.workload.event.RecalculateProjectStateTask; import de.tudarmstadt.ukp.inception.workload.matrix.config.MatrixWorkloadManagerAutoConfiguration; +import de.tudarmstadt.ukp.inception.workload.task.RecalculateProjectStateTask; /** * Watches the state of the annotations and documents in matrix projects. diff --git a/inception/inception-workload/pom.xml b/inception/inception-workload/pom.xml index 910b6a0e826..2bd1e790e7c 100644 --- a/inception/inception-workload/pom.xml +++ b/inception/inception-workload/pom.xml @@ -59,6 +59,10 @@ de.tudarmstadt.ukp.inception.app inception-support-bootstrap + + de.tudarmstadt.ukp.inception.app + inception-documents-api + org.apache.wicket diff --git a/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/config/WorkloadManagementAutoConfiguration.java b/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/config/WorkloadManagementAutoConfiguration.java index d07e5eb29c7..15bf950a41d 100644 --- a/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/config/WorkloadManagementAutoConfiguration.java +++ b/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/config/WorkloadManagementAutoConfiguration.java @@ -25,13 +25,13 @@ import org.springframework.context.annotation.Lazy; import de.tudarmstadt.ukp.inception.scheduling.SchedulingService; -import de.tudarmstadt.ukp.inception.workload.event.exporter.WorkloadManagerExporter; import de.tudarmstadt.ukp.inception.workload.extension.WorkloadManagerExtension; import de.tudarmstadt.ukp.inception.workload.extension.WorkloadManagerExtensionPoint; import de.tudarmstadt.ukp.inception.workload.extension.WorkloadManagerExtensionPointImpl; import de.tudarmstadt.ukp.inception.workload.model.WorkloadManagementService; import de.tudarmstadt.ukp.inception.workload.model.WorkloadManagementServiceImpl; import de.tudarmstadt.ukp.inception.workload.model.WorkloadManager; +import de.tudarmstadt.ukp.inception.workload.task.exporter.WorkloadManagerExporter; import jakarta.persistence.EntityManager; @Configuration diff --git a/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/model/WorkloadManagementServiceImpl.java b/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/model/WorkloadManagementServiceImpl.java index 77d51882ab7..5b979cc5a87 100644 --- a/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/model/WorkloadManagementServiceImpl.java +++ b/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/model/WorkloadManagementServiceImpl.java @@ -34,9 +34,9 @@ import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument; import de.tudarmstadt.ukp.inception.scheduling.SchedulingService; import de.tudarmstadt.ukp.inception.workload.config.WorkloadManagementAutoConfiguration; -import de.tudarmstadt.ukp.inception.workload.event.RecalculateProjectStateTask; import de.tudarmstadt.ukp.inception.workload.extension.WorkloadManagerExtension; import de.tudarmstadt.ukp.inception.workload.extension.WorkloadManagerExtensionPoint; +import de.tudarmstadt.ukp.inception.workload.task.RecalculateProjectStateTask; import jakarta.persistence.EntityManager; import jakarta.persistence.NoResultException; diff --git a/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/event/RecalculateProjectStateTask.java b/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/task/RecalculateProjectStateTask.java similarity index 77% rename from inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/event/RecalculateProjectStateTask.java rename to inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/task/RecalculateProjectStateTask.java index 7dc57705d90..871e779fb42 100644 --- a/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/event/RecalculateProjectStateTask.java +++ b/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/task/RecalculateProjectStateTask.java @@ -15,8 +15,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.inception.workload.event; +package de.tudarmstadt.ukp.inception.workload.task; +import static de.tudarmstadt.ukp.inception.scheduling.MatchResult.NO_MATCH; +import static de.tudarmstadt.ukp.inception.scheduling.MatchResult.UNQUEUE_EXISTING_AND_QUEUE_THIS; import static java.time.Duration.ofSeconds; import java.util.Objects; @@ -26,12 +28,17 @@ import de.tudarmstadt.ukp.clarin.webanno.model.Project; import de.tudarmstadt.ukp.inception.project.api.ProjectService; import de.tudarmstadt.ukp.inception.scheduling.DebouncingTask; +import de.tudarmstadt.ukp.inception.scheduling.MatchResult; +import de.tudarmstadt.ukp.inception.scheduling.MatchableTask; +import de.tudarmstadt.ukp.inception.scheduling.Task; import de.tudarmstadt.ukp.inception.workload.extension.WorkloadManagerExtension; import de.tudarmstadt.ukp.inception.workload.model.WorkloadManagementService; import jakarta.persistence.NoResultException; public class RecalculateProjectStateTask extends DebouncingTask + implements MatchableTask + { public static final String TYPE = "RecalculateProjectStateTask"; @@ -67,6 +74,21 @@ public void execute() ext.recalculate(project); } + @Override + public MatchResult matches(Task aTask) + { + // If a recalculation task for a project is coming in, we can throw out any scheduled tasks + // for updating in the project. + if (aTask instanceof RecalculateProjectStateTask + || aTask instanceof UpdateProjectStateTask) { + if (Objects.equals(getProject().getId(), aTask.getProject().getId())) { + return UNQUEUE_EXISTING_AND_QUEUE_THIS; + } + } + + return NO_MATCH; + } + @Override public boolean equals(Object o) { diff --git a/inception/inception-documents/src/main/java/de/tudarmstadt/ukp/inception/documents/UpdateProjectStateTask.java b/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/task/UpdateProjectStateTask.java similarity index 76% rename from inception/inception-documents/src/main/java/de/tudarmstadt/ukp/inception/documents/UpdateProjectStateTask.java rename to inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/task/UpdateProjectStateTask.java index a1e633120ff..7538e0fccb7 100644 --- a/inception/inception-documents/src/main/java/de/tudarmstadt/ukp/inception/documents/UpdateProjectStateTask.java +++ b/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/task/UpdateProjectStateTask.java @@ -15,8 +15,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.inception.documents; +package de.tudarmstadt.ukp.inception.workload.task; +import static de.tudarmstadt.ukp.inception.scheduling.MatchResult.DISCARD_OR_QUEUE_THIS; +import static de.tudarmstadt.ukp.inception.scheduling.MatchResult.NO_MATCH; import static java.time.Duration.ofSeconds; import java.util.Objects; @@ -25,19 +27,19 @@ import de.tudarmstadt.ukp.clarin.webanno.model.Project; import de.tudarmstadt.ukp.inception.documents.api.DocumentService; -import de.tudarmstadt.ukp.inception.documents.api.SourceDocumentStateStats; import de.tudarmstadt.ukp.inception.project.api.ProjectService; import de.tudarmstadt.ukp.inception.scheduling.DebouncingTask; -import jakarta.persistence.EntityManager; +import de.tudarmstadt.ukp.inception.scheduling.MatchResult; +import de.tudarmstadt.ukp.inception.scheduling.MatchableTask; +import de.tudarmstadt.ukp.inception.scheduling.Task; import jakarta.persistence.NoResultException; -import jakarta.persistence.PersistenceContext; public class UpdateProjectStateTask extends DebouncingTask + implements MatchableTask { public static final String TYPE = "UpdateProjectStateTask"; - private @PersistenceContext EntityManager entityManager; private @Autowired ProjectService projectService; private @Autowired DocumentService documentService; @@ -66,11 +68,25 @@ public void execute() return; } - SourceDocumentStateStats stats = documentService.getSourceDocumentStats(project); + var stats = documentService.getSourceDocumentStats(project); projectService.setProjectState(project, stats.getProjectState()); } + @Override + public MatchResult matches(Task aTask) + { + // If a re-calculation task for the project is scheduled, we do not need to schedule a new + // update task + if (aTask instanceof RecalculateProjectStateTask reCalcTask) { + if (Objects.equals(reCalcTask.getProject().getId(), getProject().getId())) { + return DISCARD_OR_QUEUE_THIS; + } + } + + return NO_MATCH; + } + @Override public boolean equals(Object o) { diff --git a/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/event/exporter/ExportedWorkloadManager.java b/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/task/exporter/ExportedWorkloadManager.java similarity index 96% rename from inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/event/exporter/ExportedWorkloadManager.java rename to inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/task/exporter/ExportedWorkloadManager.java index 78a4337fa2f..b1309ef5358 100644 --- a/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/event/exporter/ExportedWorkloadManager.java +++ b/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/task/exporter/ExportedWorkloadManager.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.inception.workload.event.exporter; +package de.tudarmstadt.ukp.inception.workload.task.exporter; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; diff --git a/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/event/exporter/WorkloadManagerExporter.java b/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/task/exporter/WorkloadManagerExporter.java similarity index 98% rename from inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/event/exporter/WorkloadManagerExporter.java rename to inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/task/exporter/WorkloadManagerExporter.java index 6df0b7e0ef8..7c3b0ed6827 100644 --- a/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/event/exporter/WorkloadManagerExporter.java +++ b/inception/inception-workload/src/main/java/de/tudarmstadt/ukp/inception/workload/task/exporter/WorkloadManagerExporter.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.inception.workload.event.exporter; +package de.tudarmstadt.ukp.inception.workload.task.exporter; import java.util.zip.ZipFile; import java.util.zip.ZipOutputStream; diff --git a/inception/inception-workload/src/test/java/de/tudarmstadt/ukp/inception/workload/exporter/WorkloadManagerExporterTest.java b/inception/inception-workload/src/test/java/de/tudarmstadt/ukp/inception/workload/exporter/WorkloadManagerExporterTest.java index 191c8be780a..a3c95e7451e 100644 --- a/inception/inception-workload/src/test/java/de/tudarmstadt/ukp/inception/workload/exporter/WorkloadManagerExporterTest.java +++ b/inception/inception-workload/src/test/java/de/tudarmstadt/ukp/inception/workload/exporter/WorkloadManagerExporterTest.java @@ -37,9 +37,9 @@ import de.tudarmstadt.ukp.clarin.webanno.api.export.ProjectImportRequest; import de.tudarmstadt.ukp.clarin.webanno.export.model.ExportedProject; import de.tudarmstadt.ukp.clarin.webanno.model.Project; -import de.tudarmstadt.ukp.inception.workload.event.exporter.WorkloadManagerExporter; import de.tudarmstadt.ukp.inception.workload.model.WorkloadManagementService; import de.tudarmstadt.ukp.inception.workload.model.WorkloadManager; +import de.tudarmstadt.ukp.inception.workload.task.exporter.WorkloadManagerExporter; @ExtendWith(MockitoExtension.class) public class WorkloadManagerExporterTest