Skip to content

Commit

Permalink
feat/met 5806 support embeddable resources profile (#675)
Browse files Browse the repository at this point in the history
* MET-5806 add ombed detection for webresources

* MET-5806 add tika detection for oEmbedded items

* MET-5806 refactor unit tests

* MET-5806 add complementary unit tests

* MET-5806 correct file detection implementation and update unit tests

* MET-5806 added code review comments

* MET-5806 remove unused imports and optimizations

* MET-5806 remove chain of responsability pattern, start refactor

* MET-5806 add xpath condition for oembed objects, pending fix unit tests

* MET-5806 updated textprocessor and oembedprocessor with unit tests

* MET-5806 add sonarqube recommendations

* MET-5806 sonarcloud recommendations improvements

* MET-5806 sonarcloud improvement recommendations part II

* MET-5806 sonarcloud recomendations part III

* MET-5806 Fix sonar issues

---------

Co-authored-by: Simon Tzanakis <[email protected]>
  • Loading branch information
jeortizquan and stzanakis authored Aug 15, 2024
1 parent 6457c23 commit 09d064e
Show file tree
Hide file tree
Showing 32 changed files with 2,037 additions and 207 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ public class RdfNamespaceContext implements NamespaceContext {
public static final String RDF_NAMESPACE_PREFIX = "rdf";
public static final String EDM_NAMESPACE_PREFIX = "edm";
public static final String ORE_NAMESPACE_PREFIX = "ore";
public static final String SVCS_NAMESPACE_PREFIX = "svcs";
public static final String DCTERMS_NAMESPACE_PREFIX = "dcterms";

private static final Map<String, String> PREFIX_TO_NAMESPACE_MAP = new HashMap<>();

Expand All @@ -30,6 +32,8 @@ public class RdfNamespaceContext implements NamespaceContext {
PREFIX_TO_NAMESPACE_MAP.put(RDF_NAMESPACE_PREFIX, "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
PREFIX_TO_NAMESPACE_MAP.put(ORE_NAMESPACE_PREFIX, "http://www.openarchives.org/ore/terms/");
PREFIX_TO_NAMESPACE_MAP.put(EDM_NAMESPACE_PREFIX, "http://www.europeana.eu/schemas/edm/");
PREFIX_TO_NAMESPACE_MAP.put(SVCS_NAMESPACE_PREFIX,"http://rdfs.org/sioc/services#");
PREFIX_TO_NAMESPACE_MAP.put(DCTERMS_NAMESPACE_PREFIX, "http://purl.org/dc/terms/");
}

@Override
Expand Down
5 changes: 5 additions & 0 deletions metis-media-service/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -130,5 +130,10 @@
<groupId>org.wiremock</groupId>
<artifactId>wiremock-standalone</artifactId>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package eu.europeana.metis.mediaprocessing;

/**
* Rdf xpath string constants.
*/
public final class RdfXpathConstants {

public static final String RDF_NAMESPACE = "/rdf:RDF";
public static final String ORE_AGGREGATION = RDF_NAMESPACE + "/ore:Aggregation";
public static final String EDM_OBJECT = ORE_AGGREGATION + "/edm:object/@rdf:resource";
public static final String EDM_IS_SHOWN_BY = ORE_AGGREGATION + "/edm:isShownBy/@rdf:resource";
public static final String EDM_HAS_VIEW = ORE_AGGREGATION + "/edm:hasView/@rdf:resource";
public static final String EDM_IS_SHOWN_AT = ORE_AGGREGATION + "/edm:isShownAt/@rdf:resource";

private RdfXpathConstants() {}

}
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,13 @@
class AudioVideoProcessor implements MediaProcessor {

private static final Logger LOGGER = LoggerFactory.getLogger(AudioVideoProcessor.class);
public static final int FFPROBE_MAX_VERSION = 7;
public static final int FFPROBE_MIN_VERSION = 2;

private static String globalFfprobeCommand;

private final CommandExecutor commandExecutor;

private final String ffprobeCommand;

/**
Expand Down Expand Up @@ -97,7 +100,7 @@ static String discoverFfprobeCommand(CommandExecutor commandExecutor)
int indexVersion = output.lastIndexOf("version ") + "version ".length();
int version = Character.isDigit(output.charAt(indexVersion)) ?
Integer.parseInt(String.valueOf(output.charAt(indexVersion))) : 0;
if (!(version >= 2 && version < 7)) {
if (!(version >= FFPROBE_MIN_VERSION && version < FFPROBE_MAX_VERSION)) {
throw new MediaProcessorException("ffprobe version " + version + ".x not found");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ public ResourceExtractionResult copyMetadata(Resource resource, String detectedM
public boolean downloadResourceForFullProcessing() {
return false;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import org.apache.tika.io.TikaInputStream;
Expand Down Expand Up @@ -51,27 +52,27 @@ enum ProcessingMode {FULL, REDUCED, NONE}
private final AudioVideoProcessor audioVideoProcessor;
private final TextProcessor textProcessor;
private final Media3dProcessor media3dProcessor;
private final OEmbedProcessor oEmbedProcessor;

/**
* Constructor meant for testing purposes.
*
* @param resourceDownloadClient The download client for resources.
* @param mimeTypeDetectHttpClient The mime type detector for URLs.
* @param tika A tika instance.
* @param imageProcessor An image processor.
* @param audioVideoProcessor An audio/video processor.
* @param textProcessor A text processor.
* @param mediaProcessorList the media processor list
*/
MediaExtractorImpl(ResourceDownloadClient resourceDownloadClient,
MimeTypeDetectHttpClient mimeTypeDetectHttpClient, TikaWrapper tika, ImageProcessor imageProcessor,
AudioVideoProcessor audioVideoProcessor, TextProcessor textProcessor, Media3dProcessor media3dProcessor) {
MimeTypeDetectHttpClient mimeTypeDetectHttpClient, TikaWrapper tika,
List<MediaProcessor> mediaProcessorList) {
this.resourceDownloadClient = resourceDownloadClient;
this.mimeTypeDetectHttpClient = mimeTypeDetectHttpClient;
this.tika = tika;
this.imageProcessor = imageProcessor;
this.audioVideoProcessor = audioVideoProcessor;
this.textProcessor = textProcessor;
this.media3dProcessor = media3dProcessor;
this.imageProcessor = (ImageProcessor) getMediaProcessor(mediaProcessorList, ImageProcessor.class);
this.audioVideoProcessor = (AudioVideoProcessor) getMediaProcessor(mediaProcessorList, AudioVideoProcessor.class);
this.textProcessor = (TextProcessor) getMediaProcessor(mediaProcessorList, TextProcessor.class);
this.media3dProcessor = (Media3dProcessor) getMediaProcessor(mediaProcessorList, Media3dProcessor.class);
this.oEmbedProcessor = (OEmbedProcessor) getMediaProcessor(mediaProcessorList, OEmbedProcessor.class);
}

/**
Expand Down Expand Up @@ -102,6 +103,16 @@ public MediaExtractorImpl(int redirectCount, int thumbnailGenerateTimeout,
this.textProcessor = new TextProcessor(thumbnailGenerator,
new PdfToImageConverter(new CommandExecutor(thumbnailGenerateTimeout)));
this.media3dProcessor = new Media3dProcessor();
this.oEmbedProcessor = new OEmbedProcessor();
}

private <T> Object getMediaProcessor(List<?> mediaProcessorList, Class<T> type) {
for (Object mediaProcessor : mediaProcessorList) {
if (type.isInstance(mediaProcessor)) {
return type.cast(mediaProcessor);
}
}
return null;
}

@Override
Expand Down Expand Up @@ -193,10 +204,10 @@ String detectType(Path path, String providedMimeType) throws IOException {
}
}

MediaProcessor chooseMediaProcessor(MediaType mediaType) {
MediaProcessor chooseMediaProcessor(MediaType mediaType, String detectedMimeType) {
final MediaProcessor processor;
switch (mediaType) {
case TEXT -> processor = textProcessor;
case TEXT, OTHER -> processor = chooseByDetectedMimeType(mediaType, detectedMimeType);
case AUDIO, VIDEO -> processor = audioVideoProcessor;
case IMAGE -> processor = imageProcessor;
case THREE_D -> processor = media3dProcessor;
Expand All @@ -205,6 +216,20 @@ MediaProcessor chooseMediaProcessor(MediaType mediaType) {
return processor;
}

MediaProcessor chooseByDetectedMimeType(MediaType mediaType, String detectedMimeType) {
if (detectedMimeType == null) {
return null;
} else if ((mediaType == MediaType.TEXT || mediaType == MediaType.OTHER) &&
(detectedMimeType.startsWith("text/xml") || detectedMimeType.startsWith("application/xml")
|| detectedMimeType.startsWith("application/json"))) {
return oEmbedProcessor;
} else if (mediaType == MediaType.TEXT) {
return textProcessor;
} else {
return null;
}
}

void verifyAndCorrectContentAvailability(Resource resource, ProcessingMode mode,
String detectedMimeType) throws MediaExtractionException, IOException {

Expand Down Expand Up @@ -255,19 +280,32 @@ ResourceExtractionResult performProcessing(Resource resource, ProcessingMode mod
}

// Choose the right media processor.
final MediaProcessor processor = chooseMediaProcessor(MediaType.getMediaType(detectedMimeType));
MediaProcessor processor = chooseMediaProcessor(MediaType.getMediaType(detectedMimeType), detectedMimeType);

// Process the resource depending on the mode.
final ResourceExtractionResult result;
ResourceExtractionResult result;
if (processor == null) {
result = null;
} else if (mode == ProcessingMode.FULL) {
} else {
result = getResourceExtractionResult(resource, mode, mainThumbnailAvailable, processor, detectedMimeType);
}
// No oEmbed detected try with text processing
if (processor instanceof OEmbedProcessor && result == null) {
processor = textProcessor;
result = getResourceExtractionResult(resource, mode, mainThumbnailAvailable, processor, detectedMimeType);
}
// Done
return result;
}

private static ResourceExtractionResult getResourceExtractionResult(Resource resource, ProcessingMode mode,
boolean mainThumbnailAvailable, MediaProcessor processor, String detectedMimeType) throws MediaExtractionException {
ResourceExtractionResult result;
// Process the resource depending on the mode.
if (mode == ProcessingMode.FULL) {
result = processor.extractMetadata(resource, detectedMimeType, mainThumbnailAvailable);
} else {
result = processor.copyMetadata(resource, detectedMimeType);
}

// Done
return result;
}

Expand All @@ -281,7 +319,7 @@ public void close() throws IOException {
* @return true if and only if resources of the given type need to be downloaded before performing full processing.
*/
boolean shouldDownloadForFullProcessing(String mimeType) {
return Optional.of(MediaType.getMediaType(mimeType)).map(this::chooseMediaProcessor)
return Optional.of(MediaType.getMediaType(mimeType)).map(mediaType -> chooseMediaProcessor(mediaType, mimeType))
.map(MediaProcessor::downloadResourceForFullProcessing).orElse(Boolean.FALSE);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package eu.europeana.metis.mediaprocessing.extraction;

import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.checkValidWidthAndHeightDimensions;
import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.getDurationFromModel;
import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.getOEmbedModelFromJson;
import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.getOEmbedModelFromXml;
import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.isValidOEmbedPhotoOrVideo;

import eu.europeana.metis.mediaprocessing.exception.MediaExtractionException;
import eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedModel;
import eu.europeana.metis.mediaprocessing.model.ImageResourceMetadata;
import eu.europeana.metis.mediaprocessing.model.Resource;
import eu.europeana.metis.mediaprocessing.model.ResourceExtractionResult;
import eu.europeana.metis.mediaprocessing.model.ResourceExtractionResultImpl;
import eu.europeana.metis.mediaprocessing.model.VideoResourceMetadata;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Locale;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* The type Oembed processor.
*/
public class OEmbedProcessor implements MediaProcessor {

/**
* The constant LOGGER.
*/
private static final Logger LOGGER = LoggerFactory.getLogger(OEmbedProcessor.class);

/**
* Process a resource by extracting the metadata from the content.
*
* @param resource The resource to process. Note that the resource may not have content (see
* {@link MediaExtractorImpl#shouldDownloadForFullProcessing(String)}).
* @param detectedMimeType The mime type that was detected for this resource (may deviate from the mime type that was provided
* by the server and which is stored in {@link Resource#getProvidedMimeType()}).
* @param mainThumbnailAvailable Whether the main thumbnail for this record is available. This may influence the decision on
* whether to generate a thumbnail for this resource.
* @return The result of the processing.
* @throws MediaExtractionException In case something went wrong during the extraction.
*/
@Override
public ResourceExtractionResult extractMetadata(Resource resource, String detectedMimeType, boolean mainThumbnailAvailable)
throws MediaExtractionException {

ResourceExtractionResult resourceExtractionResult;
// the content for this oembed needs to be downloaded to be examined
if (resource.getContentPath() != null) {
try {
OEmbedModel embedModel = null;
if (detectedMimeType.startsWith("application/json")) {
embedModel = getOEmbedModelFromJson(Files.readAllBytes(Paths.get(resource.getContentPath().toString())));
} else if (detectedMimeType.startsWith("application/xml")) {
embedModel = getOEmbedModelFromXml(Files.readAllBytes(Paths.get(resource.getContentPath().toString())));
}
if (isValidOEmbedPhotoOrVideo(embedModel)) {
checkValidWidthAndHeightDimensions(embedModel, resource.getResourceUrl());
resourceExtractionResult = getResourceExtractionResult(resource, detectedMimeType, embedModel);
} else {
LOGGER.warn("No oembed model found");
resourceExtractionResult = null;
}
} catch (IOException e) {
throw new MediaExtractionException("Unable to read OEmbedded resource", e);
}
} else {
resourceExtractionResult = null;
}

return resourceExtractionResult;
}

/**
* Process a resource by copying the metadata from the input without performing any extraction.
*
* @param resource The resource to process. The resource is not expected to have content.
* @param detectedMimeType The mime type that was detected for this resource (may deviate from the mime type that was provided
* by the server and which is stored in {@link Resource#getProvidedMimeType()}).
* @return The result of the processing.
* @throws MediaExtractionException In case something went wrong during the extraction.
*/
@Override
public ResourceExtractionResult copyMetadata(Resource resource, String detectedMimeType) throws MediaExtractionException {
return null;
}

/**
* @return Whether the processor needs the downloaded resource for full processing.
*/
@Override
public boolean downloadResourceForFullProcessing() {
return true;
}

private ResourceExtractionResult getResourceExtractionResult(Resource resource, String detectedMimeType,
OEmbedModel oEmbedModel) throws MediaExtractionException {
ResourceExtractionResult resourceExtractionResult;
if (oEmbedModel != null) {
switch (oEmbedModel.getType().toLowerCase(Locale.US)) {
case "photo" -> {
ImageResourceMetadata imageResourceMetadata = new ImageResourceMetadata(detectedMimeType,
resource.getResourceUrl(),
resource.getProvidedFileSize(), oEmbedModel.getWidth(), oEmbedModel.getHeight(), null, null, null);
resourceExtractionResult = new ResourceExtractionResultImpl(imageResourceMetadata);
}
case "video" -> {
Double duration = getDurationFromModel(oEmbedModel);
VideoResourceMetadata videoResourceMetadata = new VideoResourceMetadata(detectedMimeType,
resource.getResourceUrl(),
resource.getProvidedFileSize(), duration, null, oEmbedModel.getWidth(), oEmbedModel.getHeight(), null, null);
resourceExtractionResult = new ResourceExtractionResultImpl(videoResourceMetadata);
}
default -> resourceExtractionResult = null;
}
} else {
resourceExtractionResult = null;
}
return resourceExtractionResult;
}
}
Loading

0 comments on commit 09d064e

Please sign in to comment.