diff --git a/metis-common/metis-common-utils/src/main/java/eu/europeana/metis/utils/RdfNamespaceContext.java b/metis-common/metis-common-utils/src/main/java/eu/europeana/metis/utils/RdfNamespaceContext.java index 7ccb90aac..a63e89673 100644 --- a/metis-common/metis-common-utils/src/main/java/eu/europeana/metis/utils/RdfNamespaceContext.java +++ b/metis-common/metis-common-utils/src/main/java/eu/europeana/metis/utils/RdfNamespaceContext.java @@ -19,6 +19,8 @@ public class RdfNamespaceContext implements NamespaceContext { public static final String RDF_NAMESPACE_PREFIX = "rdf"; public static final String EDM_NAMESPACE_PREFIX = "edm"; public static final String ORE_NAMESPACE_PREFIX = "ore"; + public static final String SVCS_NAMESPACE_PREFIX = "svcs"; + public static final String DCTERMS_NAMESPACE_PREFIX = "dcterms"; private static final Map PREFIX_TO_NAMESPACE_MAP = new HashMap<>(); @@ -30,6 +32,8 @@ public class RdfNamespaceContext implements NamespaceContext { PREFIX_TO_NAMESPACE_MAP.put(RDF_NAMESPACE_PREFIX, "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); PREFIX_TO_NAMESPACE_MAP.put(ORE_NAMESPACE_PREFIX, "http://www.openarchives.org/ore/terms/"); PREFIX_TO_NAMESPACE_MAP.put(EDM_NAMESPACE_PREFIX, "http://www.europeana.eu/schemas/edm/"); + PREFIX_TO_NAMESPACE_MAP.put(SVCS_NAMESPACE_PREFIX,"http://rdfs.org/sioc/services#"); + PREFIX_TO_NAMESPACE_MAP.put(DCTERMS_NAMESPACE_PREFIX, "http://purl.org/dc/terms/"); } @Override diff --git a/metis-media-service/pom.xml b/metis-media-service/pom.xml index 8471d48cb..9a1e1a976 100644 --- a/metis-media-service/pom.xml +++ b/metis-media-service/pom.xml @@ -130,5 +130,10 @@ org.wiremock wiremock-standalone + + org.mockito + mockito-junit-jupiter + test + diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/RdfDeserializerImpl.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/RdfDeserializerImpl.java index b11fe44dc..969acfa3e 100644 --- a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/RdfDeserializerImpl.java +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/RdfDeserializerImpl.java @@ -1,5 +1,10 @@ package eu.europeana.metis.mediaprocessing; +import static eu.europeana.metis.mediaprocessing.RdfXpathConstants.EDM_HAS_VIEW; +import static eu.europeana.metis.mediaprocessing.RdfXpathConstants.EDM_IS_SHOWN_AT; +import static eu.europeana.metis.mediaprocessing.RdfXpathConstants.EDM_IS_SHOWN_BY; +import static eu.europeana.metis.mediaprocessing.RdfXpathConstants.EDM_OBJECT; + import eu.europeana.metis.mediaprocessing.exception.RdfDeserializationException; import eu.europeana.metis.mediaprocessing.model.EnrichedRdf; import eu.europeana.metis.mediaprocessing.model.EnrichedRdfImpl; @@ -35,128 +40,85 @@ import org.xml.sax.SAXException; /** - * This implements RDF deserialization functionality. The code that obtains the individual resources - * does not assume that we can convert the record to an EDM internal format. Link checking must also - * run on EDM external. We therefore use XPath expressions to obtain the required data. - * - * TODO use {@link eu.europeana.metis.schema.convert.RdfConversionUtils} - no org.jibx.runtime.* - * import should remain. + * This implements RDF deserialization functionality. The code that obtains the individual resources does not assume that we can + * convert the record to an EDM internal format. Link checking must also run on EDM external. We therefore use XPath expressions + * to obtain the required data. + *

+ * TODO use {@link eu.europeana.metis.schema.convert.RdfConversionUtils} - no org.jibx.runtime.* import should remain. */ class RdfDeserializerImpl implements RdfDeserializer { - private final UnmarshallingContextWrapper unmarshallingContext = new UnmarshallingContextWrapper(); + private static final String OEMBED_NAMESPACE = "https://oembed.com/"; + private static final String XPATH_HAS_SERVICE = + "svcs:has_service/@rdf:resource = /rdf:RDF/svcs:Service/@rdf:about" + + " and /rdf:RDF/svcs:Service/dcterms:conformsTo/@rdf:resource"; + private static final String XPATH_WEB_RESOURCE = + "/rdf:RDF/edm:WebResource[" + XPATH_HAS_SERVICE + " = \"" + OEMBED_NAMESPACE + "\""; + private static final String OEMBED_XPATH_CONDITION_IS_SHOWN_BY = + EDM_IS_SHOWN_BY + "[" + EDM_IS_SHOWN_BY + " = " + XPATH_WEB_RESOURCE + "]/@rdf:about]"; + private static final String OEMBED_XPATH_CONDITION_HAS_VIEW = EDM_HAS_VIEW + + "[" + EDM_HAS_VIEW + "=" + XPATH_WEB_RESOURCE + "]/@rdf:about]"; + private final UnmarshallingContextWrapper unmarshallingContext = new UnmarshallingContextWrapper(); private final XPathExpressionWrapper getObjectExpression = new XPathExpressionWrapper( - xPath -> xPath.compile("/rdf:RDF/ore:Aggregation/edm:object/@rdf:resource")); + xPath -> xPath.compile(EDM_OBJECT)); private final XPathExpressionWrapper getHasViewExpression = new XPathExpressionWrapper( - xPath -> xPath.compile("/rdf:RDF/ore:Aggregation/edm:hasView/@rdf:resource")); + xPath -> xPath.compile(EDM_HAS_VIEW + " | " + OEMBED_XPATH_CONDITION_HAS_VIEW)); private final XPathExpressionWrapper getIsShownAtExpression = new XPathExpressionWrapper( - xPath -> xPath.compile("/rdf:RDF/ore:Aggregation/edm:isShownAt/@rdf:resource")); + xPath -> xPath.compile(EDM_IS_SHOWN_AT)); private final XPathExpressionWrapper getIsShownByExpression = new XPathExpressionWrapper( - xPath -> xPath.compile("/rdf:RDF/ore:Aggregation/edm:isShownBy/@rdf:resource")); + xPath -> xPath.compile(EDM_IS_SHOWN_BY + " | " + OEMBED_XPATH_CONDITION_IS_SHOWN_BY)); - private static class XPathExpressionWrapper extends - AbstractThreadSafeWrapper { - - XPathExpressionWrapper( - ThrowingFunction expressionCreator) { - super(() -> { - final XPathFactory factory; - synchronized (XPathFactory.class) { - factory = XPathFactory.newInstance(); - } - final XPath xPath = factory.newXPath(); - xPath.setNamespaceContext(new RdfNamespaceContext()); - try { - return expressionCreator.apply(xPath); - } catch (XPathExpressionException e) { - throw new RdfDeserializationException("Could not initialize xpath expression.", e); - } - }); - } - - NodeList evaluate(Document document) throws RdfDeserializationException { - return process(compiledExpression -> { - try { - return (NodeList) compiledExpression.evaluate(document, XPathConstants.NODESET); - } catch (XPathExpressionException e) { - throw new RdfDeserializationException("Problem with deserializing RDF.", e); - } - }); - } + private static List convertToResourceEntries( + Map> urlWithTypes) { + return urlWithTypes.entrySet().stream().map(RdfDeserializerImpl::convertToResourceEntry) + .toList(); } - private static class UnmarshallingContextWrapper extends - AbstractThreadSafeWrapper { - - public UnmarshallingContextWrapper() { - super(() -> { - try { - return RdfBindingFactoryProvider.getBindingFactory().createUnmarshallingContext(); - } catch (JiBXException e) { - throw new RdfDeserializationException("Problem creating deserializer.", e); - } - }); - } + private static RdfResourceEntry convertToResourceEntry(Map.Entry> entry) { + return new RdfResourceEntry(entry.getKey(), entry.getValue()); + } - public RDF deserializeToRdf(InputStream inputStream) throws RdfDeserializationException { - return process(context -> { - try { - return (RDF) context.unmarshalDocument(inputStream, "UTF-8"); - } catch (JiBXException e) { - throw new RdfDeserializationException("Problem with deserializing record to RDF.", e); - } - }); + private static R performDeserialization(byte[] input, DeserializationOperation operation) + throws RdfDeserializationException { + try (InputStream inputStream = new ByteArrayInputStream(input)) { + return operation.performDeserialization(inputStream); + } catch (IOException e) { + throw new RdfDeserializationException("Problem with reading byte array - Shouldn't happen.", e); } } @Override public RdfResourceEntry getMainThumbnailResourceForMediaExtraction(byte[] input) - throws RdfDeserializationException { + throws RdfDeserializationException { return performDeserialization(input, this::getMainThumbnailResourceForMediaExtraction); } @Override public RdfResourceEntry getMainThumbnailResourceForMediaExtraction(InputStream inputStream) - throws RdfDeserializationException { + throws RdfDeserializationException { return getMainThumbnailResourceForMediaExtraction(deserializeToDocument(inputStream)) - .orElse(null); - } - - private Optional getMainThumbnailResourceForMediaExtraction(Document record) - throws RdfDeserializationException { - - // Get the entries of the required types. - final Map> resourceEntries = getResourceEntries(record, - Collections.singleton(UrlType.URL_TYPE_FOR_MAIN_THUMBNAIL_RESOURCE)); - - // If there is not exactly one, we return an empty optional. - if (resourceEntries.size() != 1) { - return Optional.empty(); - } - - // So there is exactly one. Convert and return. - return Optional.of(convertToResourceEntries(resourceEntries).getFirst()); + .orElse(null); } @Override public List getRemainingResourcesForMediaExtraction(byte[] input) - throws RdfDeserializationException { + throws RdfDeserializationException { return performDeserialization(input, this::getRemainingResourcesForMediaExtraction); } @Override public List getRemainingResourcesForMediaExtraction(InputStream inputStream) - throws RdfDeserializationException { + throws RdfDeserializationException { // Get all the resource entries. - final Document record = deserializeToDocument(inputStream); - final Map> allResources = getResourceEntries(record, - UrlType.URL_TYPES_FOR_MEDIA_EXTRACTION); + final Document deserializedDocument = deserializeToDocument(inputStream); + final Map> allResources = getResourceEntries(deserializedDocument, + UrlType.URL_TYPES_FOR_MEDIA_EXTRACTION); // Find the main thumbnail resource if it exists and remove it from the result. - getMainThumbnailResourceForMediaExtraction(record).map(RdfResourceEntry::getResourceUrl) - .ifPresent(allResources::remove); + getMainThumbnailResourceForMediaExtraction(deserializedDocument).map(RdfResourceEntry::getResourceUrl) + .ifPresent(allResources::remove); // Done. return convertToResourceEntries(allResources); @@ -172,46 +134,51 @@ public List getResourceEntriesForLinkChecking(byte[] input) public List getResourceEntriesForLinkChecking(InputStream inputStream) throws RdfDeserializationException { return convertToResourceEntries(getResourceEntries(deserializeToDocument(inputStream), - UrlType.URL_TYPES_FOR_LINK_CHECKING)); + UrlType.URL_TYPES_FOR_LINK_CHECKING)); } - private static List convertToResourceEntries( - Map> urlWithTypes) { - return urlWithTypes.entrySet().stream().map(RdfDeserializerImpl::convertToResourceEntry) - .toList(); + @Override + public EnrichedRdf getRdfForResourceEnriching(byte[] input) throws RdfDeserializationException { + return performDeserialization(input, this::getRdfForResourceEnriching); } - private static RdfResourceEntry convertToResourceEntry(Map.Entry> entry) { - return new RdfResourceEntry(entry.getKey(), entry.getValue()); + @Override + public EnrichedRdf getRdfForResourceEnriching(InputStream inputStream) + throws RdfDeserializationException { + return new EnrichedRdfImpl(unmarshallingContext.deserializeToRdf(inputStream)); } - Map> getResourceEntries(Document document, - Set allowedUrlTypes) throws RdfDeserializationException { - final Map> urls = new HashMap<>(); - for (UrlType type : allowedUrlTypes) { - final Set urlsForType = getUrls(document, type); - for (String url : urlsForType) { - urls.computeIfAbsent(url, k -> new HashSet<>()).add(type); - } + private Optional getMainThumbnailResourceForMediaExtraction(Document document) + throws RdfDeserializationException { + + // Get the entries of the required types. + final Map> resourceEntries = getResourceEntries(document, + Collections.singleton(UrlType.URL_TYPE_FOR_MAIN_THUMBNAIL_RESOURCE)); + + // If there is not exactly one, we return an empty optional. + if (resourceEntries.size() != 1) { + return Optional.empty(); } - return urls; + + // So there is exactly one. Convert and return. + return Optional.of(convertToResourceEntries(resourceEntries).get(0)); } private Set getUrls(Document document, UrlType type) throws RdfDeserializationException { // Determine the right expression to apply. final XPathExpressionWrapper expression = - switch (type) { - case OBJECT -> getObjectExpression; - case HAS_VIEW -> getHasViewExpression; - case IS_SHOWN_AT -> getIsShownAtExpression; - case IS_SHOWN_BY -> getIsShownByExpression; - }; + switch (type) { + case OBJECT -> getObjectExpression; + case HAS_VIEW -> getHasViewExpression; + case IS_SHOWN_AT -> getIsShownAtExpression; + case IS_SHOWN_BY -> getIsShownByExpression; + }; // Evaluate the expression and convert the node list to a set of attribute values. final NodeList nodes = expression.evaluate(document); return IntStream.range(0, nodes.getLength()).mapToObj(nodes::item).map(Node::getNodeValue) - .collect(Collectors.toSet()); + .collect(Collectors.toSet()); } private Document deserializeToDocument(InputStream inputStream) throws RdfDeserializationException { @@ -230,29 +197,113 @@ private Document deserializeToDocument(InputStream inputStream) throws RdfDeseri } } - @Override - public EnrichedRdf getRdfForResourceEnriching(byte[] input) throws RdfDeserializationException { - return performDeserialization(input, this::getRdfForResourceEnriching); - } + @FunctionalInterface + private interface DeserializationOperation { - @Override - public EnrichedRdf getRdfForResourceEnriching(InputStream inputStream) - throws RdfDeserializationException { - return new EnrichedRdfImpl(unmarshallingContext.deserializeToRdf(inputStream)); + /** + * Perform deserialization r. + * + * @param inputStream the input stream + * @return the r + * @throws RdfDeserializationException the rdf deserialization exception + */ + R performDeserialization(InputStream inputStream) throws RdfDeserializationException; } - private static R performDeserialization(byte[] input, DeserializationOperation operation) - throws RdfDeserializationException { - try (InputStream inputStream = new ByteArrayInputStream(input)) { - return operation.performDeserialization(inputStream); - } catch (IOException e) { - throw new RdfDeserializationException("Problem with reading byte array - Shouldn't happen.", e); + private static class XPathExpressionWrapper extends + AbstractThreadSafeWrapper { + + /** + * Instantiates a new X path expression wrapper. + * + * @param expressionCreator the expression creator + */ + XPathExpressionWrapper( + ThrowingFunction expressionCreator) { + super(() -> { + final XPathFactory factory; + synchronized (XPathFactory.class) { + factory = XPathFactory.newInstance(); + } + final XPath xPath = factory.newXPath(); + xPath.setNamespaceContext(new RdfNamespaceContext()); + try { + return expressionCreator.apply(xPath); + } catch (XPathExpressionException e) { + throw new RdfDeserializationException("Could not initialize xpath expression.", e); + } + }); + } + + /** + * Evaluate node list. + * + * @param document the document + * @return the node list + * @throws RdfDeserializationException the rdf deserialization exception + */ + NodeList evaluate(Document document) throws RdfDeserializationException { + return process(compiledExpression -> { + try { + return (NodeList) compiledExpression.evaluate(document, XPathConstants.NODESET); + } catch (XPathExpressionException e) { + throw new RdfDeserializationException("Problem with deserializing RDF.", e); + } + }); } } - @FunctionalInterface - private interface DeserializationOperation { + private static class UnmarshallingContextWrapper extends + AbstractThreadSafeWrapper { - R performDeserialization(InputStream inputStream) throws RdfDeserializationException; + /** + * Instantiates a new Unmarshalling context wrapper. + */ + public UnmarshallingContextWrapper() { + super(() -> { + try { + return RdfBindingFactoryProvider.getBindingFactory().createUnmarshallingContext(); + } catch (JiBXException e) { + throw new RdfDeserializationException("Problem creating deserializer.", e); + } + }); + } + + /** + * Deserialize to rdf rdf. + * + * @param inputStream the input stream + * @return the rdf + * @throws RdfDeserializationException the rdf deserialization exception + */ + public RDF deserializeToRdf(InputStream inputStream) throws RdfDeserializationException { + return process(context -> { + try { + return (RDF) context.unmarshalDocument(inputStream, "UTF-8"); + } catch (JiBXException e) { + throw new RdfDeserializationException("Problem with deserializing record to RDF.", e); + } + }); + } + } + + /** + * Gets resource entries. + * + * @param document the document + * @param allowedUrlTypes the allowed url types + * @return the resource entries + * @throws RdfDeserializationException the rdf deserialization exception + */ + Map> getResourceEntries(Document document, + Set allowedUrlTypes) throws RdfDeserializationException { + final Map> urls = new HashMap<>(); + for (UrlType type : allowedUrlTypes) { + final Set urlsForType = getUrls(document, type); + for (String url : urlsForType) { + urls.computeIfAbsent(url, k -> new HashSet<>()).add(type); + } + } + return urls; } } diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/RdfXpathConstants.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/RdfXpathConstants.java new file mode 100644 index 000000000..05bbd1654 --- /dev/null +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/RdfXpathConstants.java @@ -0,0 +1,17 @@ +package eu.europeana.metis.mediaprocessing; + +/** + * Rdf xpath string constants. + */ +public final class RdfXpathConstants { + + public static final String RDF_NAMESPACE = "/rdf:RDF"; + public static final String ORE_AGGREGATION = RDF_NAMESPACE + "/ore:Aggregation"; + public static final String EDM_OBJECT = ORE_AGGREGATION + "/edm:object/@rdf:resource"; + public static final String EDM_IS_SHOWN_BY = ORE_AGGREGATION + "/edm:isShownBy/@rdf:resource"; + public static final String EDM_HAS_VIEW = ORE_AGGREGATION + "/edm:hasView/@rdf:resource"; + public static final String EDM_IS_SHOWN_AT = ORE_AGGREGATION + "/edm:isShownAt/@rdf:resource"; + + private RdfXpathConstants() {} + +} diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/AudioVideoProcessor.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/AudioVideoProcessor.java index df541dede..b6d82d3c0 100644 --- a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/AudioVideoProcessor.java +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/AudioVideoProcessor.java @@ -48,10 +48,13 @@ class AudioVideoProcessor implements MediaProcessor { private static final Logger LOGGER = LoggerFactory.getLogger(AudioVideoProcessor.class); + public static final int FFPROBE_MAX_VERSION = 7; + public static final int FFPROBE_MIN_VERSION = 2; private static String globalFfprobeCommand; private final CommandExecutor commandExecutor; + private final String ffprobeCommand; /** @@ -97,7 +100,7 @@ static String discoverFfprobeCommand(CommandExecutor commandExecutor) int indexVersion = output.lastIndexOf("version ") + "version ".length(); int version = Character.isDigit(output.charAt(indexVersion)) ? Integer.parseInt(String.valueOf(output.charAt(indexVersion))) : 0; - if (!(version >= 2 && version < 7)) { + if (!(version >= FFPROBE_MIN_VERSION && version < FFPROBE_MAX_VERSION)) { throw new MediaProcessorException("ffprobe version " + version + ".x not found"); } diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/Media3dProcessor.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/Media3dProcessor.java index a799ea083..2367946dd 100644 --- a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/Media3dProcessor.java +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/Media3dProcessor.java @@ -29,4 +29,5 @@ public ResourceExtractionResult copyMetadata(Resource resource, String detectedM public boolean downloadResourceForFullProcessing() { return false; } + } diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/MediaExtractorImpl.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/MediaExtractorImpl.java index 998d67ef3..35d116ec4 100644 --- a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/MediaExtractorImpl.java +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/MediaExtractorImpl.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.EnumSet; +import java.util.List; import java.util.Optional; import java.util.Set; import org.apache.tika.io.TikaInputStream; @@ -51,6 +52,7 @@ enum ProcessingMode {FULL, REDUCED, NONE} private final AudioVideoProcessor audioVideoProcessor; private final TextProcessor textProcessor; private final Media3dProcessor media3dProcessor; + private final OEmbedProcessor oEmbedProcessor; /** * Constructor meant for testing purposes. @@ -58,20 +60,19 @@ enum ProcessingMode {FULL, REDUCED, NONE} * @param resourceDownloadClient The download client for resources. * @param mimeTypeDetectHttpClient The mime type detector for URLs. * @param tika A tika instance. - * @param imageProcessor An image processor. - * @param audioVideoProcessor An audio/video processor. - * @param textProcessor A text processor. + * @param mediaProcessorList the media processor list */ MediaExtractorImpl(ResourceDownloadClient resourceDownloadClient, - MimeTypeDetectHttpClient mimeTypeDetectHttpClient, TikaWrapper tika, ImageProcessor imageProcessor, - AudioVideoProcessor audioVideoProcessor, TextProcessor textProcessor, Media3dProcessor media3dProcessor) { + MimeTypeDetectHttpClient mimeTypeDetectHttpClient, TikaWrapper tika, + List mediaProcessorList) { this.resourceDownloadClient = resourceDownloadClient; this.mimeTypeDetectHttpClient = mimeTypeDetectHttpClient; this.tika = tika; - this.imageProcessor = imageProcessor; - this.audioVideoProcessor = audioVideoProcessor; - this.textProcessor = textProcessor; - this.media3dProcessor = media3dProcessor; + this.imageProcessor = (ImageProcessor) getMediaProcessor(mediaProcessorList, ImageProcessor.class); + this.audioVideoProcessor = (AudioVideoProcessor) getMediaProcessor(mediaProcessorList, AudioVideoProcessor.class); + this.textProcessor = (TextProcessor) getMediaProcessor(mediaProcessorList, TextProcessor.class); + this.media3dProcessor = (Media3dProcessor) getMediaProcessor(mediaProcessorList, Media3dProcessor.class); + this.oEmbedProcessor = (OEmbedProcessor) getMediaProcessor(mediaProcessorList, OEmbedProcessor.class); } /** @@ -102,6 +103,16 @@ public MediaExtractorImpl(int redirectCount, int thumbnailGenerateTimeout, this.textProcessor = new TextProcessor(thumbnailGenerator, new PdfToImageConverter(new CommandExecutor(thumbnailGenerateTimeout))); this.media3dProcessor = new Media3dProcessor(); + this.oEmbedProcessor = new OEmbedProcessor(); + } + + private Object getMediaProcessor(List mediaProcessorList, Class type) { + for (Object mediaProcessor : mediaProcessorList) { + if (type.isInstance(mediaProcessor)) { + return type.cast(mediaProcessor); + } + } + return null; } @Override @@ -193,10 +204,10 @@ String detectType(Path path, String providedMimeType) throws IOException { } } - MediaProcessor chooseMediaProcessor(MediaType mediaType) { + MediaProcessor chooseMediaProcessor(MediaType mediaType, String detectedMimeType) { final MediaProcessor processor; switch (mediaType) { - case TEXT -> processor = textProcessor; + case TEXT, OTHER -> processor = chooseByDetectedMimeType(mediaType, detectedMimeType); case AUDIO, VIDEO -> processor = audioVideoProcessor; case IMAGE -> processor = imageProcessor; case THREE_D -> processor = media3dProcessor; @@ -205,6 +216,20 @@ MediaProcessor chooseMediaProcessor(MediaType mediaType) { return processor; } + MediaProcessor chooseByDetectedMimeType(MediaType mediaType, String detectedMimeType) { + if (detectedMimeType == null) { + return null; + } else if ((mediaType == MediaType.TEXT || mediaType == MediaType.OTHER) && + (detectedMimeType.startsWith("text/xml") || detectedMimeType.startsWith("application/xml") + || detectedMimeType.startsWith("application/json"))) { + return oEmbedProcessor; + } else if (mediaType == MediaType.TEXT) { + return textProcessor; + } else { + return null; + } + } + void verifyAndCorrectContentAvailability(Resource resource, ProcessingMode mode, String detectedMimeType) throws MediaExtractionException, IOException { @@ -255,19 +280,32 @@ ResourceExtractionResult performProcessing(Resource resource, ProcessingMode mod } // Choose the right media processor. - final MediaProcessor processor = chooseMediaProcessor(MediaType.getMediaType(detectedMimeType)); + MediaProcessor processor = chooseMediaProcessor(MediaType.getMediaType(detectedMimeType), detectedMimeType); - // Process the resource depending on the mode. - final ResourceExtractionResult result; + ResourceExtractionResult result; if (processor == null) { result = null; - } else if (mode == ProcessingMode.FULL) { + } else { + result = getResourceExtractionResult(resource, mode, mainThumbnailAvailable, processor, detectedMimeType); + } + // No oEmbed detected try with text processing + if (processor instanceof OEmbedProcessor && result == null) { + processor = textProcessor; + result = getResourceExtractionResult(resource, mode, mainThumbnailAvailable, processor, detectedMimeType); + } + // Done + return result; + } + + private static ResourceExtractionResult getResourceExtractionResult(Resource resource, ProcessingMode mode, + boolean mainThumbnailAvailable, MediaProcessor processor, String detectedMimeType) throws MediaExtractionException { + ResourceExtractionResult result; + // Process the resource depending on the mode. + if (mode == ProcessingMode.FULL) { result = processor.extractMetadata(resource, detectedMimeType, mainThumbnailAvailable); } else { result = processor.copyMetadata(resource, detectedMimeType); } - - // Done return result; } @@ -281,7 +319,7 @@ public void close() throws IOException { * @return true if and only if resources of the given type need to be downloaded before performing full processing. */ boolean shouldDownloadForFullProcessing(String mimeType) { - return Optional.of(MediaType.getMediaType(mimeType)).map(this::chooseMediaProcessor) + return Optional.of(MediaType.getMediaType(mimeType)).map(mediaType -> chooseMediaProcessor(mediaType, mimeType)) .map(MediaProcessor::downloadResourceForFullProcessing).orElse(Boolean.FALSE); } } diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/OEmbedProcessor.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/OEmbedProcessor.java new file mode 100644 index 000000000..90f10310c --- /dev/null +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/OEmbedProcessor.java @@ -0,0 +1,123 @@ +package eu.europeana.metis.mediaprocessing.extraction; + +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.checkValidWidthAndHeightDimensions; +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.getDurationFromModel; +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.getOEmbedModelFromJson; +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.getOEmbedModelFromXml; +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.isValidOEmbedPhotoOrVideo; + +import eu.europeana.metis.mediaprocessing.exception.MediaExtractionException; +import eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedModel; +import eu.europeana.metis.mediaprocessing.model.ImageResourceMetadata; +import eu.europeana.metis.mediaprocessing.model.Resource; +import eu.europeana.metis.mediaprocessing.model.ResourceExtractionResult; +import eu.europeana.metis.mediaprocessing.model.ResourceExtractionResultImpl; +import eu.europeana.metis.mediaprocessing.model.VideoResourceMetadata; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Locale; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The type Oembed processor. + */ +public class OEmbedProcessor implements MediaProcessor { + + /** + * The constant LOGGER. + */ + private static final Logger LOGGER = LoggerFactory.getLogger(OEmbedProcessor.class); + + /** + * Process a resource by extracting the metadata from the content. + * + * @param resource The resource to process. Note that the resource may not have content (see + * {@link MediaExtractorImpl#shouldDownloadForFullProcessing(String)}). + * @param detectedMimeType The mime type that was detected for this resource (may deviate from the mime type that was provided + * by the server and which is stored in {@link Resource#getProvidedMimeType()}). + * @param mainThumbnailAvailable Whether the main thumbnail for this record is available. This may influence the decision on + * whether to generate a thumbnail for this resource. + * @return The result of the processing. + * @throws MediaExtractionException In case something went wrong during the extraction. + */ + @Override + public ResourceExtractionResult extractMetadata(Resource resource, String detectedMimeType, boolean mainThumbnailAvailable) + throws MediaExtractionException { + + ResourceExtractionResult resourceExtractionResult; + // the content for this oembed needs to be downloaded to be examined + if (resource.getContentPath() != null) { + try { + OEmbedModel embedModel = null; + if (detectedMimeType.startsWith("application/json")) { + embedModel = getOEmbedModelFromJson(Files.readAllBytes(Paths.get(resource.getContentPath().toString()))); + } else if (detectedMimeType.startsWith("application/xml")) { + embedModel = getOEmbedModelFromXml(Files.readAllBytes(Paths.get(resource.getContentPath().toString()))); + } + if (isValidOEmbedPhotoOrVideo(embedModel)) { + checkValidWidthAndHeightDimensions(embedModel, resource.getResourceUrl()); + resourceExtractionResult = getResourceExtractionResult(resource, detectedMimeType, embedModel); + } else { + LOGGER.warn("No oembed model found"); + resourceExtractionResult = null; + } + } catch (IOException e) { + throw new MediaExtractionException("Unable to read OEmbedded resource", e); + } + } else { + resourceExtractionResult = null; + } + + return resourceExtractionResult; + } + + /** + * Process a resource by copying the metadata from the input without performing any extraction. + * + * @param resource The resource to process. The resource is not expected to have content. + * @param detectedMimeType The mime type that was detected for this resource (may deviate from the mime type that was provided + * by the server and which is stored in {@link Resource#getProvidedMimeType()}). + * @return The result of the processing. + * @throws MediaExtractionException In case something went wrong during the extraction. + */ + @Override + public ResourceExtractionResult copyMetadata(Resource resource, String detectedMimeType) throws MediaExtractionException { + return null; + } + + /** + * @return Whether the processor needs the downloaded resource for full processing. + */ + @Override + public boolean downloadResourceForFullProcessing() { + return true; + } + + private ResourceExtractionResult getResourceExtractionResult(Resource resource, String detectedMimeType, + OEmbedModel oEmbedModel) throws MediaExtractionException { + ResourceExtractionResult resourceExtractionResult; + if (oEmbedModel != null) { + switch (oEmbedModel.getType().toLowerCase(Locale.US)) { + case "photo" -> { + ImageResourceMetadata imageResourceMetadata = new ImageResourceMetadata(detectedMimeType, + resource.getResourceUrl(), + resource.getProvidedFileSize(), oEmbedModel.getWidth(), oEmbedModel.getHeight(), null, null, null); + resourceExtractionResult = new ResourceExtractionResultImpl(imageResourceMetadata); + } + case "video" -> { + Double duration = getDurationFromModel(oEmbedModel); + VideoResourceMetadata videoResourceMetadata = new VideoResourceMetadata(detectedMimeType, + resource.getResourceUrl(), + resource.getProvidedFileSize(), duration, null, oEmbedModel.getWidth(), oEmbedModel.getHeight(), null, null); + resourceExtractionResult = new ResourceExtractionResultImpl(videoResourceMetadata); + } + default -> resourceExtractionResult = null; + } + } else { + resourceExtractionResult = null; + } + return resourceExtractionResult; + } +} diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/oembed/OEmbedModel.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/oembed/OEmbedModel.java new file mode 100644 index 000000000..d612029ae --- /dev/null +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/oembed/OEmbedModel.java @@ -0,0 +1,362 @@ +package eu.europeana.metis.mediaprocessing.extraction.oembed; + +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Objects; + +/** + * Model based on the standard https://oembed.com/ + */ +public class OEmbedModel { + + private String type; + private String version; + private String title; + private int height; + private int width; + private String url; + @JsonProperty("author_name") + private String authorName; + @JsonProperty("author_url") + private String authorUrl; + @JsonProperty("provider_name") + private String providerName; + @JsonProperty("provider_url") + private String providerUrl; + @JsonProperty("cache_age") + private String cacheAge; + @JsonProperty("thumbnail_url") + private String thumbnailUrl; + @JsonProperty("thumbnail_height") + private String thumbnailHeight; + @JsonProperty("thumbnail_width") + private String thumbnailWidth; + private String html; + private String duration; + + /** + * Gets type. + * + * @return the type + */ + public String getType() { + return type; + } + + /** + * Sets type. + * + * @param type the type + */ + public void setType(String type) { + this.type = type; + } + + /** + * Gets version. + * + * @return the version + */ + public String getVersion() { + return version; + } + + /** + * Sets version. + * + * @param version the version + */ + public void setVersion(String version) { + this.version = version; + } + + /** + * Gets title. + * + * @return the title + */ + public String getTitle() { + return title; + } + + /** + * Sets title. + * + * @param title the title + */ + public void setTitle(String title) { + this.title = title; + } + + /** + * Gets height. + * + * @return the height + */ + public int getHeight() { + return height; + } + + /** + * Sets height. + * + * @param height the height + */ + public void setHeight(int height) { + this.height = height; + } + + /** + * Gets width. + * + * @return the width + */ + public int getWidth() { + return width; + } + + /** + * Sets width. + * + * @param width the width + */ + public void setWidth(int width) { + this.width = width; + } + + /** + * Gets url. + * + * @return the url + */ + public String getUrl() { + return url; + } + + /** + * Sets url. + * + * @param url the url + */ + public void setUrl(String url) { + this.url = url; + } + + /** + * Gets author name. + * + * @return the author name + */ + public String getAuthorName() { + return authorName; + } + + /** + * Sets author name. + * + * @param authorName the author name + */ + public void setAuthorName(String authorName) { + this.authorName = authorName; + } + + /** + * Gets author url. + * + * @return the author url + */ + public String getAuthorUrl() { + return authorUrl; + } + + /** + * Sets author url. + * + * @param authorUrl the author url + */ + public void setAuthorUrl(String authorUrl) { + this.authorUrl = authorUrl; + } + + /** + * Gets provider name. + * + * @return the provider name + */ + public String getProviderName() { + return providerName; + } + + /** + * Sets provider name. + * + * @param providerName the provider name + */ + public void setProviderName(String providerName) { + this.providerName = providerName; + } + + /** + * Gets provider url. + * + * @return the provider url + */ + public String getProviderUrl() { + return providerUrl; + } + + /** + * Sets provider url. + * + * @param providerUrl the provider url + */ + public void setProviderUrl(String providerUrl) { + this.providerUrl = providerUrl; + } + + /** + * Gets cache age. + * + * @return the cache age + */ + public String getCacheAge() { + return cacheAge; + } + + /** + * Sets cache age. + * + * @param cacheAge the cache age + */ + public void setCacheAge(String cacheAge) { + this.cacheAge = cacheAge; + } + + /** + * Gets thumbnail url. + * + * @return the thumbnail url + */ + public String getThumbnailUrl() { + return thumbnailUrl; + } + + /** + * Sets thumbnail url. + * + * @param thumbnailUrl the thumbnail url + */ + public void setThumbnailUrl(String thumbnailUrl) { + this.thumbnailUrl = thumbnailUrl; + } + + /** + * Gets thumbnail height. + * + * @return the thumbnail height + */ + public String getThumbnailHeight() { + return thumbnailHeight; + } + + /** + * Sets thumbnail height. + * + * @param thumbnailHeight the thumbnail height + */ + public void setThumbnailHeight(String thumbnailHeight) { + this.thumbnailHeight = thumbnailHeight; + } + + /** + * Gets thumbnail width. + * + * @return the thumbnail width + */ + public String getThumbnailWidth() { + return thumbnailWidth; + } + + /** + * Sets thumbnail width. + * + * @param thumbnailWidth the thumbnail width + */ + public void setThumbnailWidth(String thumbnailWidth) { + this.thumbnailWidth = thumbnailWidth; + } + + /** + * Gets html. + * + * @return the html + */ + public String getHtml() { + return html; + } + + /** + * Sets html. + * + * @param html the html + */ + public void setHtml(String html) { + this.html = html; + } + + /** + * Gets duration. + * + * @return the duration + */ + public String getDuration() { + return duration; + } + + /** + * Sets duration. + * + * @param duration the duration + */ + public void setDuration(String duration) { + this.duration = duration; + } + + @Override + public final boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof OEmbedModel that)) { + return false; + } + + return height == that.height && width == that.width && type.equals(that.type) && version.equals(that.version) + && Objects.equals(title, that.title) && url.equals(that.url) && Objects.equals(authorName, + that.authorName) && Objects.equals(authorUrl, that.authorUrl) && Objects.equals(providerName, + that.providerName) && Objects.equals(providerUrl, that.providerUrl) && Objects.equals(cacheAge, + that.cacheAge) && Objects.equals(thumbnailUrl, that.thumbnailUrl) && Objects.equals(thumbnailHeight, + that.thumbnailHeight) && Objects.equals(thumbnailWidth, that.thumbnailWidth) && Objects.equals(html, + that.html) && Objects.equals(duration, that.duration); + } + + @Override + public int hashCode() { + int result = type.hashCode(); + result = 31 * result + version.hashCode(); + result = 31 * result + Objects.hashCode(title); + result = 31 * result + height; + result = 31 * result + width; + result = 31 * result + url.hashCode(); + result = 31 * result + Objects.hashCode(authorName); + result = 31 * result + Objects.hashCode(authorUrl); + result = 31 * result + Objects.hashCode(providerName); + result = 31 * result + Objects.hashCode(providerUrl); + result = 31 * result + Objects.hashCode(cacheAge); + result = 31 * result + Objects.hashCode(thumbnailUrl); + result = 31 * result + Objects.hashCode(thumbnailHeight); + result = 31 * result + Objects.hashCode(thumbnailWidth); + result = 31 * result + Objects.hashCode(html); + result = 31 * result + Objects.hashCode(duration); + return result; + } +} diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/oembed/OEmbedValidation.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/oembed/OEmbedValidation.java new file mode 100644 index 000000000..cbeace77d --- /dev/null +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/extraction/oembed/OEmbedValidation.java @@ -0,0 +1,387 @@ +package eu.europeana.metis.mediaprocessing.extraction.oembed; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.xml.XmlMapper; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.web.util.UriComponentsBuilder; + +/** + * The type oEmbed validation methods. + */ +public final class OEmbedValidation { + + private static final String MAX_HEIGHT = "maxheight"; + private static final String MAX_WIDTH = "maxwidth"; + private static final String INVALID_URL = "Invalid url"; + private static final Logger LOGGER = LoggerFactory.getLogger(OEmbedValidation.class); + + private OEmbedValidation() { + // validations class + } + + /** + * Gets oembed model from json. + * + * @param jsonResource byte[] + * @return the oembed model from json + * @throws IOException the io exception + */ + public static OEmbedModel getOEmbedModelFromJson(byte[] jsonResource) throws IOException { + ObjectMapper objectMapper = new ObjectMapper(); + objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + return objectMapper.readValue(jsonResource, OEmbedModel.class); + } + + /** + * Gets oembed model from xml. + * + * @param xmlResource byte[] + * @return the oembed model from xml + * @throws IOException the io exception + */ + public static OEmbedModel getOEmbedModelFromXml(byte[] xmlResource) throws IOException { + XmlMapper xmlMapper = new XmlMapper(); + xmlMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + return xmlMapper.readValue(xmlResource, OEmbedModel.class); + } + + /** + * Is valid oembed photo or video boolean. + * + * @param oEmbedModel the oembed model + * @return the boolean true complies the minimum required fields for each type + */ + public static boolean isValidOEmbedPhotoOrVideo(OEmbedModel oEmbedModel) { + return hasValidVersion(oEmbedModel) && hasValidType(oEmbedModel); + } + + /** + * Has valid height size url boolean. + * + * @param oEmbedModel the oEmbed model + * @param url the url + * @return the boolean + */ + public static boolean hasValidHeightSizeUrl(OEmbedModel oEmbedModel, String url) { + boolean result = false; + Map params; + if (oEmbedModel != null) { + try { + params = UriComponentsBuilder.fromUri(new URI(url)) + .build() + .getQueryParams() + .toSingleValueMap(); + + if (containsMaxHeightAndMaxWidth(params) && hasValidMaxHeight(params) + && isOEmbedValidHeight(oEmbedModel, params)) { + result = true; + } else { + LOGGER.warn("Not valid height according to max height"); + } + } catch (URISyntaxException e) { + LOGGER.warn(INVALID_URL, e); + } catch (NumberFormatException e) { + LOGGER.warn("Not valid height dimension size", e); + } + } + return result; + } + + /** + * Has valid height size thumbnail boolean. + * + * @param oEmbedModel the oEmbed model + * @param url the url + * @return the boolean + */ + public static boolean hasValidHeightSizeThumbnail(OEmbedModel oEmbedModel, String url) { + boolean result = false; + Map params; + if (oEmbedModel != null) { + try { + params = UriComponentsBuilder.fromUri(new URI(url)). + build() + .getQueryParams() + .toSingleValueMap(); + + if (containsMaxHeightAndMaxWidth(params) && hasValidMaxHeight(params) + && hasThumbnailUrl(oEmbedModel) && isOEmbedValidThumbnailHeight(oEmbedModel, params)) { + result = true; + } else { + LOGGER.warn("Not valid thumbnail size for max height parameter"); + } + } catch (URISyntaxException e) { + LOGGER.warn(INVALID_URL, e); + } catch (NumberFormatException e) { + LOGGER.warn("Not valid height thumbnail dimension size", e); + } + } + return result; + } + + /** + * Has valid width size url boolean. + * + * @param oEmbedModel the oEmbed model + * @param url the url + * @return the boolean + */ + public static boolean hasValidWidthSizeUrl(OEmbedModel oEmbedModel, String url) { + boolean result = false; + Map params; + if (oEmbedModel != null) { + try { + params = UriComponentsBuilder.fromUri(new URI(url)). + build() + .getQueryParams() + .toSingleValueMap(); + + if (containsMaxHeightAndMaxWidth(params) && hasValidMaxWidth(params) + && isOEmbedValidWidth(oEmbedModel, params)) { + result = true; + } else { + LOGGER.warn("Not valid width according to max width"); + } + } catch (URISyntaxException e) { + LOGGER.warn(INVALID_URL, e); + } catch (NumberFormatException e) { + LOGGER.warn("Not valid width dimension size", e); + } + } + return result; + } + + /** + * Has valid width size thumbnail boolean. + * + * @param oEmbedModel the oEmbed model + * @param url the url + * @return the boolean + */ + public static boolean hasValidWidthSizeThumbnail(OEmbedModel oEmbedModel, String url) { + boolean result = false; + Map params; + if (oEmbedModel != null) { + try { + params = UriComponentsBuilder.fromUri(new URI(url)). + build() + .getQueryParams() + .toSingleValueMap(); + + if (containsMaxHeightAndMaxWidth(params) && hasValidMaxWidth(params) + && hasThumbnailUrl(oEmbedModel) && isOEmbedValidThumbnailWidth(oEmbedModel, params)) { + result = true; + } else { + LOGGER.warn("Not valid thumbnail size for max width parameter"); + } + } catch (URISyntaxException e) { + LOGGER.warn("Invalid url ", e); + } catch (NumberFormatException e) { + LOGGER.warn("Not valid thumbnail width dimension size", e); + } + } + return result; + } + + /** + * Check valid width and height dimensions. + * + * @param oEmbedModel the oEmbed model + * @param url the url + */ + public static void checkValidWidthAndHeightDimensions(OEmbedModel oEmbedModel, String url) { + if (hasValidHeightSizeUrl(oEmbedModel, url)) { + LOGGER.info("Valid url dimensions of height"); + } else { + LOGGER.warn("Not valid url dimensions of height"); + } + if (hasValidWidthSizeUrl(oEmbedModel, url)) { + LOGGER.info("Valid url dimensions of width"); + } else { + LOGGER.warn("Not valid url dimensions of width"); + } + if (hasValidHeightSizeThumbnail(oEmbedModel, url)) { + LOGGER.info("Valid thumbnail dimensions of height"); + } else { + LOGGER.warn("Not valid thumbnail dimensions of height"); + } + if (hasValidWidthSizeThumbnail(oEmbedModel, url)) { + LOGGER.info("Valid thumbnail dimensions of width"); + } else { + LOGGER.warn("Not valid thumbnail dimensions of width"); + } + } + + /** + * Gets duration from model. + * + * @param oEmbedModel the oEmbed model + * @return the duration from model + */ + public static double getDurationFromModel(OEmbedModel oEmbedModel) { + double duration; + try { + duration = Double.parseDouble(oEmbedModel.getDuration()); + } catch (NumberFormatException e) { + duration = 0.0; + } + return duration; + } + + /** + * Is oEmbed valid thumbnail height boolean. + * + * @param oEmbedModel the oEmbed model + * @param params the params + * @return the boolean + */ + private static boolean isOEmbedValidThumbnailHeight(OEmbedModel oEmbedModel, Map params) { + return Integer.parseInt(oEmbedModel.getThumbnailHeight()) <= Integer.parseInt(params.get(MAX_HEIGHT)); + } + + /** + * Is oEmbed valid thumbnail width boolean. + * + * @param oEmbedModel the oEmbed model + * @param params the params + * @return the boolean + */ + private static boolean isOEmbedValidThumbnailWidth(OEmbedModel oEmbedModel, Map params) { + return Integer.parseInt(oEmbedModel.getThumbnailWidth()) <= Integer.parseInt(params.get(MAX_WIDTH)); + } + + /** + * Is oEmbed valid width boolean. + * + * @param oEmbedModel the oEmbed model + * @param params the params + * @return the boolean + */ + private static boolean isOEmbedValidWidth(OEmbedModel oEmbedModel, Map params) { + return oEmbedModel.getWidth() <= Integer.parseInt(params.get(MAX_WIDTH)); + } + + /** + * Is oEmbed valid height boolean. + * + * @param oEmbedModel the oEmbed model + * @param params the params + * @return the boolean + */ + private static boolean isOEmbedValidHeight(OEmbedModel oEmbedModel, Map params) { + return oEmbedModel.getHeight() <= Integer.parseInt(params.get(MAX_HEIGHT)); + } + + /** + * Has valid max height boolean. + * + * @param params the params + * @return the boolean + */ + private static boolean hasValidMaxHeight(Map params) { + return Integer.parseInt(params.get(MAX_HEIGHT)) > 0; + } + + /** + * Has valid max width boolean. + * + * @param params the params + * @return the boolean + */ + private static boolean hasValidMaxWidth(Map params) { + return Integer.parseInt(params.get(MAX_WIDTH)) > 0; + } + + /** + * Check if params contains max height and max width boolean. + * + * @param params the params + * @return the boolean + */ + private static boolean containsMaxHeightAndMaxWidth(Map params) { + return params.containsKey(MAX_HEIGHT) || params.containsKey(MAX_WIDTH); + } + + /** + * Has thumbnail url boolean. + * + * @param oEmbedModel the oEmbed model + * @return the boolean + */ + private static boolean hasThumbnailUrl(OEmbedModel oEmbedModel) { + return oEmbedModel.getThumbnailUrl() != null; + } + + /** + * Has valid type boolean. + * + * @param oEmbedModel the oEmbed model + * @return the boolean + */ + private static boolean hasValidType(OEmbedModel oEmbedModel) { + return (isValidTypePhoto(oEmbedModel) || isValidTypeVideo(oEmbedModel)); + } + + /** + * Is valid type photo boolean. + * + * @param oEmbedModel the oEmbed model + * @return the boolean + */ + private static boolean isValidTypePhoto(OEmbedModel oEmbedModel) { + return hasValidModelAndType(oEmbedModel) + && "photo".equalsIgnoreCase(oEmbedModel.getType()) + && oEmbedModel.getUrl() != null && !oEmbedModel.getUrl().isEmpty() + && hasValidDimensions(oEmbedModel); + } + + /** + * Is valid type video boolean. + * + * @param oEmbedModel the oEmbed model + * @return the boolean + */ + private static boolean isValidTypeVideo(OEmbedModel oEmbedModel) { + return hasValidModelAndType(oEmbedModel) + && "video".equalsIgnoreCase(oEmbedModel.getType()) + && oEmbedModel.getHtml() != null && !oEmbedModel.getHtml().isEmpty() + && hasValidDimensions(oEmbedModel); + } + + /** + * Has valid model and type boolean. + * + * @param oEmbedModel the oEmbed model + * @return the boolean + */ + private static boolean hasValidModelAndType(OEmbedModel oEmbedModel) { + return oEmbedModel != null && oEmbedModel.getType() != null; + } + + /** + * Has valid dimensions boolean. + * + * @param oEmbedModel the oEmbed model + * @return the boolean + */ + private static boolean hasValidDimensions(OEmbedModel oEmbedModel) { + return (oEmbedModel.getWidth() > 0 && oEmbedModel.getHeight() > 0); + } + + /** + * Has valid version boolean. private + * + * @param oEmbedModel the oEmbed model + * @return the boolean + */ + private static boolean hasValidVersion(OEmbedModel oEmbedModel) { + return oEmbedModel != null && oEmbedModel.getVersion() != null + && oEmbedModel.getVersion().startsWith("1.0"); + } +} diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/http/MimeTypeDetectHttpClient.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/http/MimeTypeDetectHttpClient.java index 98128affe..ce07d223a 100644 --- a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/http/MimeTypeDetectHttpClient.java +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/http/MimeTypeDetectHttpClient.java @@ -1,16 +1,17 @@ package eu.europeana.metis.mediaprocessing.http; +import static org.apache.tika.metadata.TikaCoreProperties.RESOURCE_NAME_KEY; + import eu.europeana.metis.mediaprocessing.wrappers.TikaWrapper; import eu.europeana.metis.network.AbstractHttpClient; -import org.apache.commons.lang3.StringUtils; -import org.apache.tika.Tika; -import org.apache.tika.metadata.Metadata; -import org.springframework.http.ContentDisposition; - import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URL; +import org.apache.commons.lang3.StringUtils; +import org.apache.tika.Tika; +import org.apache.tika.metadata.Metadata; +import org.springframework.http.ContentDisposition; /** * An {@link AbstractHttpClient} that tries to determine the mime type of a link. It does so based @@ -77,7 +78,7 @@ protected String createResult(URL providedLink, URI actualUri, ContentDispositio final Metadata metadata = new Metadata(); final String resourceName = getResourceNameFromContentDispositionOrFromActualURI(contentDisposition, actualUri); if (resourceName != null) { - metadata.set(Metadata.RESOURCE_NAME_KEY, resourceName); + metadata.set(RESOURCE_NAME_KEY, resourceName); } if (mimeType != null) { final int separatorIndex = mimeType.indexOf(';'); diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/ImageResourceMetadata.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/ImageResourceMetadata.java index f0736363b..887ed5546 100644 --- a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/ImageResourceMetadata.java +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/ImageResourceMetadata.java @@ -2,6 +2,7 @@ import eu.europeana.metis.mediaprocessing.exception.MediaExtractionException; import eu.europeana.metis.schema.jibx.ColorSpaceType; +import eu.europeana.metis.schema.jibx.EdmType; import eu.europeana.metis.schema.model.Orientation; import java.util.Collections; import java.util.List; @@ -96,6 +97,8 @@ protected void updateResource(WebResource resource) { resource.setOrientation(orientation); resource.setColorspace(colorSpace); resource.setDominantColors(getDominantColors()); + resource.setEdmType(EdmType.IMAGE); + } public Integer getWidth() { diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/RdfResourceEntry.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/RdfResourceEntry.java index 8033a45b9..8a0fda7a0 100644 --- a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/RdfResourceEntry.java +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/RdfResourceEntry.java @@ -20,6 +20,17 @@ public class RdfResourceEntry implements Serializable { private String resourceUrl; private Set urlTypes; + /** + * Instantiates a new Rdf resource entry. + * + * @param resourceUrl The URL of the resource. + * @param urlTypes The resource URL types with which this resource is referenced. + */ + public RdfResourceEntry(String resourceUrl, Set urlTypes) { + this.resourceUrl = resourceUrl; + this.urlTypes = new HashSet<>(urlTypes); + } + /** * Constructor. * diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/VideoResourceMetadata.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/VideoResourceMetadata.java index 3cf8fbfdf..395fa548b 100644 --- a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/VideoResourceMetadata.java +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/VideoResourceMetadata.java @@ -1,5 +1,7 @@ package eu.europeana.metis.mediaprocessing.model; +import eu.europeana.metis.schema.jibx.EdmType; + /** * Resource metadata for video resources. */ @@ -78,6 +80,7 @@ protected void updateResource(WebResource resource) { resource.setHeight(height); resource.setCodecName(codecName); resource.setFrameRate(frameRate); + resource.setEdmType(EdmType.VIDEO); } public Double getDuration() { diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/WebResource.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/WebResource.java index 46df71f5c..f7be24242 100644 --- a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/WebResource.java +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/model/WebResource.java @@ -6,6 +6,7 @@ import eu.europeana.metis.schema.jibx.ColorSpaceType; import eu.europeana.metis.schema.jibx.DoubleType; import eu.europeana.metis.schema.jibx.Duration; +import eu.europeana.metis.schema.jibx.EdmType; import eu.europeana.metis.schema.jibx.HasColorSpace; import eu.europeana.metis.schema.jibx.HasMimeType; import eu.europeana.metis.schema.jibx.Height; @@ -19,6 +20,7 @@ import eu.europeana.metis.schema.jibx.SpatialResolution; import eu.europeana.metis.schema.jibx.StringType; import eu.europeana.metis.schema.jibx.Type1; +import eu.europeana.metis.schema.jibx.Type2; import eu.europeana.metis.schema.jibx.WebResourceType; import eu.europeana.metis.schema.jibx.Width; import eu.europeana.metis.schema.model.Orientation; @@ -146,6 +148,22 @@ void setResolution(Integer resolution) { resource.setSpatialResolution(uintVal(SpatialResolution::new, resolution)); } + void setEdmType(EdmType edmType) { + if (edmType == null) { + resource.setType1(null); + } else if (isValidEdmType(edmType)) { + Type2 type2 = new Type2(); + type2.setType(edmType); + resource.setType1(type2); + } else { + resource.setType1(null); + } + } + + boolean isValidEdmType(EdmType edmType) { + return edmType.equals(EdmType.IMAGE) || edmType.equals(EdmType.VIDEO); + } + private static T intVal(Supplier constructor, Integer value) { if (value == null) { return null; diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/wrappers/OEmbedJsonFileDetector.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/wrappers/OEmbedJsonFileDetector.java new file mode 100644 index 000000000..a765f9932 --- /dev/null +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/wrappers/OEmbedJsonFileDetector.java @@ -0,0 +1,60 @@ +package eu.europeana.metis.mediaprocessing.wrappers; + +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.getOEmbedModelFromJson; +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.isValidOEmbedPhotoOrVideo; + +import eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedModel; +import java.io.IOException; +import java.io.InputStream; +import java.io.Serial; +import org.apache.tika.detect.Detector; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The type OEmbed json file detector. + */ +public class OEmbedJsonFileDetector implements Detector { + + private static final Logger LOGGER = LoggerFactory.getLogger(OEmbedJsonFileDetector.class); + private static final MediaType OEMBED_JSON = MediaType.application("json+oembed"); + @Serial + private static final long serialVersionUID = -3009429767832982324L; + + /** + * Detects the content type of the given input document. Returns + * application/octet-stream if the type of the document + * can not be detected. + *

+ * If the document input stream is not available, then the first argument may be null. Otherwise the detector may + * read bytes from the start of the stream to help in type detection. The given stream is guaranteed to support the + * {@link InputStream#markSupported() mark feature} and the detector is expected to {@link InputStream#mark(int) mark} the + * stream before reading any bytes from it, and to {@link InputStream#reset() reset} the stream before returning. The stream + * must not be closed by the detector. + *

+ * The given input metadata is only read, not modified, by the detector. + * + * @param input document input stream, or null + * @param metadata input metadata for the document + * @return detected media type, or application/octet-stream + * @throws IOException exception if the document input stream could not be read + */ + @Override + public MediaType detect(InputStream input, Metadata metadata) throws IOException { + try { + input.mark(Integer.MAX_VALUE); + OEmbedModel embedModel = getOEmbedModelFromJson(input.readAllBytes()); + if (isValidOEmbedPhotoOrVideo(embedModel)) { + return OEMBED_JSON; + } + } catch (IOException e) { + LOGGER.warn("unable to read json returning octet stream: ", e); + return MediaType.OCTET_STREAM; + } finally { + input.reset(); + } + return MediaType.OCTET_STREAM; + } +} diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/wrappers/OEmbedXmlFileDetector.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/wrappers/OEmbedXmlFileDetector.java new file mode 100644 index 000000000..ee59fa57a --- /dev/null +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/wrappers/OEmbedXmlFileDetector.java @@ -0,0 +1,60 @@ +package eu.europeana.metis.mediaprocessing.wrappers; + +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.getOEmbedModelFromXml; +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.isValidOEmbedPhotoOrVideo; + +import eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedModel; +import java.io.IOException; +import java.io.InputStream; +import java.io.Serial; +import org.apache.tika.detect.Detector; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The type OEmbed xml file detector. + */ +public class OEmbedXmlFileDetector implements Detector { + + private static final Logger LOGGER = LoggerFactory.getLogger(OEmbedXmlFileDetector.class); + private static final MediaType OEMBED_XML = MediaType.application("xml+oembed"); + @Serial + private static final long serialVersionUID = -4502227849462535039L; + + /** + * Detects the content type of the given input document. Returns + * application/octet-stream if the type of the document + * can not be detected. + *

+ * If the document input stream is not available, then the first argument may be null. Otherwise the detector may + * read bytes from the start of the stream to help in type detection. The given stream is guaranteed to support the + * {@link InputStream#markSupported() mark feature} and the detector is expected to {@link InputStream#mark(int) mark} the + * stream before reading any bytes from it, and to {@link InputStream#reset() reset} the stream before returning. The stream + * must not be closed by the detector. + *

+ * The given input metadata is only read, not modified, by the detector. + * + * @param input document input stream, or null + * @param metadata input metadata for the document + * @return detected media type, or application/octet-stream + * @throws IOException exception if the document input stream could not be read + */ + @Override + public MediaType detect(InputStream input, Metadata metadata) throws IOException { + try { + input.mark(Integer.MAX_VALUE); + OEmbedModel embedModel = getOEmbedModelFromXml(input.readAllBytes()); + if (isValidOEmbedPhotoOrVideo(embedModel)) { + return OEMBED_XML; + } + } catch (IOException e) { + LOGGER.warn("unable to read xml returning octet stream: ", e); + return MediaType.OCTET_STREAM; + } finally { + input.reset(); + } + return MediaType.OCTET_STREAM; + } +} diff --git a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/wrappers/TikaWrapper.java b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/wrappers/TikaWrapper.java index a297613a1..d69704648 100644 --- a/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/wrappers/TikaWrapper.java +++ b/metis-media-service/src/main/java/eu/europeana/metis/mediaprocessing/wrappers/TikaWrapper.java @@ -2,7 +2,10 @@ import java.io.IOException; import java.io.InputStream; +import java.util.List; import org.apache.tika.Tika; +import org.apache.tika.detect.CompositeDetector; +import org.apache.tika.detect.DefaultDetector; import org.apache.tika.metadata.Metadata; /** @@ -16,11 +19,18 @@ public class TikaWrapper { * It creates a new instance of Tika */ public TikaWrapper() { - this.tika = new Tika(); + OEmbedJsonFileDetector embedJsonFileDetector = new OEmbedJsonFileDetector(); + OEmbedXmlFileDetector embedXmlFileDetector = new OEmbedXmlFileDetector(); + CompositeDetector compositeDetector = new CompositeDetector( + List.of(embedJsonFileDetector, embedXmlFileDetector, new DefaultDetector()) + ); + + this.tika = new Tika(compositeDetector); } /** * It uses tika's own detect method + * * @param inputStream The input stream to detect from * @param metadata The metadata associated with the input stream * @return The mime type detected from the input stream @@ -30,7 +40,7 @@ public String detect(InputStream inputStream, Metadata metadata) throws IOExcept String detectedMimeType = tika.detect(inputStream, metadata); - if(detectedMimeType.equals("application/vnd.ms-pki.stl")){ + if (detectedMimeType.equals("application/vnd.ms-pki.stl")) { return "model/x.stl-binary"; } diff --git a/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/RdfDeserializerImplTest.java b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/RdfDeserializerImplTest.java index 749d81004..16063909a 100644 --- a/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/RdfDeserializerImplTest.java +++ b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/RdfDeserializerImplTest.java @@ -4,8 +4,11 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import eu.europeana.metis.mediaprocessing.exception.RdfDeserializationException; +import eu.europeana.metis.mediaprocessing.model.RdfResourceEntry; import eu.europeana.metis.mediaprocessing.model.UrlType; +import java.io.InputStream; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.Set; import javax.xml.parsers.DocumentBuilderFactory; @@ -19,9 +22,31 @@ class RdfDeserializerImplTest { private static final String RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; private static final String ORE_NAMESPACE = "http://www.openarchives.org/ore/terms/"; private static final String EDM_NAMESPACE = "http://www.europeana.eu/schemas/edm/"; + private static final String SVCS_NAMESPACE = "http://rdfs.org/sioc/services#"; + private static final String DCTERMS_NAMESPACE = "http://purl.org/dc/terms/"; + + private static String addEdmOEmbedResourceType(Document document, Element aggregation, String typeName, String resourceValue) { + final Element object = document.createElementNS(EDM_NAMESPACE, typeName); + object.setAttributeNS(RDF_NAMESPACE, "resource", resourceValue); + aggregation.appendChild(object); + final Element webResource = document.createElementNS(EDM_NAMESPACE, "WebResource"); + webResource.setAttributeNS(RDF_NAMESPACE, "about", resourceValue); + final Element hasService = document.createElementNS(SVCS_NAMESPACE, "has_service"); + final String oEmbedResourceService = "http://resource/services/oembed/"; + hasService.setAttributeNS(RDF_NAMESPACE, "resource", oEmbedResourceService); + webResource.appendChild(hasService); + object.setAttributeNS(RDF_NAMESPACE, "resource", resourceValue); + final Element service = document.createElementNS(SVCS_NAMESPACE, "Service"); + service.setAttributeNS(RDF_NAMESPACE, "about", oEmbedResourceService); + final Element conformsTo = document.createElementNS(DCTERMS_NAMESPACE, "conformsTo"); + service.appendChild(conformsTo); + object.appendChild(webResource); + object.appendChild(service); + return resourceValue; + } private static String addEdmResourceType(Document document, Element aggregation, String typeName, - String resourceValue) { + String resourceValue) { final Element object = document.createElementNS(EDM_NAMESPACE, typeName); object.setAttributeNS(RDF_NAMESPACE, "resource", resourceValue); aggregation.appendChild(object); @@ -33,27 +58,28 @@ private static String addEdmObject(Document document, Element aggregation, Strin } private static String addEdmHasView(Document document, Element aggregation, - String resourceValue) { + String resourceValue) { return addEdmResourceType(document, aggregation, "hasView", resourceValue); } private static String addEdmIsShownBy(Document document, Element aggregation, - String resourceValue) { + String resourceValue) { return addEdmResourceType(document, aggregation, "isShownBy", resourceValue); } private static String addEdmIsShownAt(Document document, Element aggregation, - String resourceValue) { + String resourceValue) { return addEdmResourceType(document, aggregation, "isShownAt", resourceValue); } @Test void testGetResourceUrlsWithDifferentResources() - throws RdfDeserializationException, ParserConfigurationException { + throws RdfDeserializationException, ParserConfigurationException { // Create document with root rdf - final Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder() - .newDocument(); + final Document document = DocumentBuilderFactory.newInstance() + .newDocumentBuilder() + .newDocument(); final Element rdf = document.createElementNS(RDF_NAMESPACE, "RDF"); document.appendChild(rdf); @@ -73,7 +99,7 @@ void testGetResourceUrlsWithDifferentResources() // Test method for all url types final Map> resultAllTypes = new RdfDeserializerImpl() - .getResourceEntries(document, Set.of(UrlType.values())); + .getResourceEntries(document, Set.of(UrlType.values())); assertEquals(6, resultAllTypes.size()); assertEquals(Collections.singleton(UrlType.OBJECT), resultAllTypes.get(object)); assertEquals(Collections.singleton(UrlType.HAS_VIEW), resultAllTypes.get(hasView1)); @@ -84,7 +110,7 @@ void testGetResourceUrlsWithDifferentResources() // Test method for selection of url types final Map> resultSelectedTypes = new RdfDeserializerImpl() - .getResourceEntries(document, Set.of(UrlType.IS_SHOWN_AT, UrlType.HAS_VIEW)); + .getResourceEntries(document, Set.of(UrlType.IS_SHOWN_AT, UrlType.HAS_VIEW)); assertEquals(3, resultSelectedTypes.size()); assertEquals(Collections.singleton(UrlType.HAS_VIEW), resultSelectedTypes.get(hasView1)); assertEquals(Collections.singleton(UrlType.HAS_VIEW), resultSelectedTypes.get(hasView2)); @@ -92,17 +118,18 @@ void testGetResourceUrlsWithDifferentResources() // Test method for no url types assertTrue( - new RdfDeserializerImpl().getResourceEntries(document, Collections.emptySet()) - .isEmpty()); + new RdfDeserializerImpl().getResourceEntries(document, Collections.emptySet()) + .isEmpty()); } @Test void testGetResourceUrlsWithSameResources() - throws RdfDeserializationException, ParserConfigurationException { + throws RdfDeserializationException, ParserConfigurationException { // Create document with root rdf - final Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder() - .newDocument(); + final Document document = DocumentBuilderFactory.newInstance() + .newDocumentBuilder() + .newDocument(); final Element rdf = document.createElementNS(RDF_NAMESPACE, "RDF"); document.appendChild(rdf); final String commonResource = "common resource"; @@ -121,26 +148,66 @@ void testGetResourceUrlsWithSameResources() // Test method for all url types final Map> resultAllTypes = new RdfDeserializerImpl() - .getResourceEntries(document, Set.of(UrlType.values())); + .getResourceEntries(document, Set.of(UrlType.values())); assertEquals(1, resultAllTypes.size()); assertEquals(Set.of(UrlType.values()), resultAllTypes.get(commonResource)); // Test method for selected url types final Set selectedTypes = Set.of(UrlType.IS_SHOWN_BY, UrlType.OBJECT); final Map> resultSelectedTypes = new RdfDeserializerImpl() - .getResourceEntries(document, selectedTypes); + .getResourceEntries(document, selectedTypes); assertEquals(1, resultSelectedTypes.size()); assertEquals(selectedTypes, resultSelectedTypes.get(commonResource)); } @Test void testGetResourceUrlsWithoutData() - throws RdfDeserializationException, ParserConfigurationException { + throws RdfDeserializationException, ParserConfigurationException { final Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder() - .newDocument(); + .newDocument(); final Element rdf = document.createElementNS(RDF_NAMESPACE, "RDF"); document.appendChild(rdf); assertTrue(new RdfDeserializerImpl().getResourceEntries(document, Collections.emptySet()) - .isEmpty()); + .isEmpty()); + } + + @Test + void testGetResourceUrlsFromOEmbedCondition() + throws RdfDeserializationException, ParserConfigurationException { + + // given Create document with root rdf + final Document document = DocumentBuilderFactory.newInstance() + .newDocumentBuilder() + .newDocument(); + final Element rdf = document.createElementNS(RDF_NAMESPACE, "RDF"); + document.appendChild(rdf); + final Element aggregation1 = document.createElementNS(ORE_NAMESPACE, "Aggregation"); + rdf.appendChild(aggregation1); + final String hasView = addEdmOEmbedResourceType(document, aggregation1, "hasView", "has view resource"); + final String isShownBy = addEdmOEmbedResourceType(document, aggregation1, "isShownBy", "is shown by resource"); + + // when test object extraction + final Map> resultAllTypes = new RdfDeserializerImpl() + .getResourceEntries(document, Set.of(UrlType.values())); + + // then check the oEmbedResources where succesfully identified. + assertEquals(2, resultAllTypes.size()); + assertEquals(Collections.singleton(UrlType.HAS_VIEW), resultAllTypes.get(hasView)); + assertEquals(Collections.singleton(UrlType.IS_SHOWN_BY), resultAllTypes.get(isShownBy)); + } + + @Test + void testGetOEmbeddableObjectsFromSample() throws RdfDeserializationException { + // given + final InputStream inputStream = getClass().getClassLoader().getResourceAsStream("__files/rdf_with_oembed_sample.xml"); + + // when + final List rdfResourceEntry = new RdfDeserializerImpl().getRemainingResourcesForMediaExtraction(inputStream); + + // then + assertEquals(2, rdfResourceEntry.size()); + assertTrue( rdfResourceEntry.stream().anyMatch( r-> r.getResourceUrl().equals("https://vimeo.com/api/oembed.json?url=https%3A%2F%2Fcdn.pixabay.com%2Fvideo%2F2023%2F10%2F22%2F186070-876973719_small.mp4")) + && rdfResourceEntry.stream().anyMatch( r-> r.getResourceUrl().equals("http://www.flickr.com/services/oembed/?url=https%3A%2F%2Fwww.flickr.com%2Fphotos%2Fbees%2F2341623661%2F&format=json"))); + } } diff --git a/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/extraction/MediaExtractorImplTest.java b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/extraction/MediaExtractorImplTest.java index 81006478a..7d8ed8f91 100644 --- a/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/extraction/MediaExtractorImplTest.java +++ b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/extraction/MediaExtractorImplTest.java @@ -7,6 +7,8 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doReturn; @@ -25,8 +27,11 @@ import eu.europeana.metis.mediaprocessing.http.ResourceDownloadClient; import eu.europeana.metis.mediaprocessing.model.RdfResourceEntry; import eu.europeana.metis.mediaprocessing.model.Resource; +import eu.europeana.metis.mediaprocessing.model.ResourceExtractionResult; import eu.europeana.metis.mediaprocessing.model.ResourceExtractionResultImpl; +import eu.europeana.metis.mediaprocessing.model.ResourceImpl; import eu.europeana.metis.mediaprocessing.model.UrlType; +import eu.europeana.metis.mediaprocessing.model.VideoResourceMetadata; import eu.europeana.metis.mediaprocessing.wrappers.TikaWrapper; import eu.europeana.metis.schema.model.MediaType; import java.io.IOException; @@ -38,8 +43,10 @@ import java.util.ArrayList; import java.util.Collections; import java.util.EnumSet; +import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.tika.metadata.Metadata; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -56,9 +63,14 @@ class MediaExtractorImplTest { private static AudioVideoProcessor audioVideoProcessor; private static TextProcessor textProcessor; private static Media3dProcessor media3dProcessor; - + private static OEmbedProcessor oEmbedProcessor; private static MediaExtractorImpl mediaExtractor; + private void testGetMode(ProcessingMode expected, Set urlTypes) { + final RdfResourceEntry entry = new RdfResourceEntry("url string", new ArrayList<>(urlTypes)); + assertEquals(expected, mediaExtractor.getMode(entry)); + } + @BeforeAll static void prepare() { resourceDownloadClient = mock(ResourceDownloadClient.class); @@ -69,14 +81,15 @@ static void prepare() { audioVideoProcessor = mock(AudioVideoProcessor.class); textProcessor = mock(TextProcessor.class); media3dProcessor = mock(Media3dProcessor.class); + oEmbedProcessor = mock(OEmbedProcessor.class); mediaExtractor = spy(new MediaExtractorImpl(resourceDownloadClient, mimeTypeDetectHttpClient, - tika, imageProcessor, audioVideoProcessor, textProcessor, media3dProcessor)); + tika, List.of(imageProcessor, audioVideoProcessor, textProcessor, media3dProcessor, oEmbedProcessor))); } @BeforeEach void resetMocks() { reset(resourceDownloadClient, mimeTypeDetectHttpClient, commandExecutor, tika, imageProcessor, - audioVideoProcessor, textProcessor, mediaExtractor); + audioVideoProcessor, textProcessor, mediaExtractor, oEmbedProcessor); } @Test @@ -123,9 +136,9 @@ void testDetectAndVerifyMimeType() throws IOException, MediaExtractionException, // Check what happens if we are not supposed to process assertThrows(IllegalStateException.class, () -> mediaExtractor.detectAndVerifyMimeType(resource, ProcessingMode.NONE)); } - - @Test - void testVerifyAndCorrectContentAvailability () throws MediaExtractionException, IOException { + + @Test + void testVerifyAndCorrectContentAvailability() throws MediaExtractionException, IOException { // Set up the resource final String location = "resource url"; @@ -166,7 +179,7 @@ void testVerifyAndCorrectContentAvailability () throws MediaExtractionException, assertThrows(MediaExtractionException.class, () -> mediaExtractor.verifyAndCorrectContentAvailability(resource, ProcessingMode.FULL, detectedMimeTypeWithContent)); - + // Test case where there is no content, but there should be and a correction is attempted. // Step 1: set the mocking to use a boolean that changes when content is set. final AtomicBoolean hasContent = new AtomicBoolean(false); @@ -178,7 +191,7 @@ void testVerifyAndCorrectContentAvailability () throws MediaExtractionException, doReturn(detectedMimeTypeNoContent).when(resource).getProvidedMimeType(); doReturn(resourceWithContent).when(resourceDownloadClient).downloadWithContent(any()); doReturn(true).when(resourceWithContent).hasContent(); - + // Step 2: make the call and check that the download has occurred. verify(resourceDownloadClient, never()).downloadWithContent(any()); mediaExtractor.verifyAndCorrectContentAvailability(resource, ProcessingMode.FULL, detectedMimeTypeWithContent); @@ -189,7 +202,7 @@ void testVerifyAndCorrectContentAvailability () throws MediaExtractionException, final RdfResourceEntry entry = entryCaptor.getValue(); assertEquals(location, entry.getResourceUrl()); verify(resource, times(1)).markAsWithContent(content); - + // Step 3: check what happens when the download does not include content either. hasContent.set(false); doReturn(false).when(resourceWithContent).hasContent(); @@ -200,12 +213,13 @@ void testVerifyAndCorrectContentAvailability () throws MediaExtractionException, @Test void testChooseMediaProcessor() { - assertSame(imageProcessor, mediaExtractor.chooseMediaProcessor(MediaType.IMAGE)); - assertSame(audioVideoProcessor, mediaExtractor.chooseMediaProcessor(MediaType.AUDIO)); - assertSame(audioVideoProcessor, mediaExtractor.chooseMediaProcessor(MediaType.VIDEO)); - assertSame(textProcessor, mediaExtractor.chooseMediaProcessor(MediaType.TEXT)); - assertSame(media3dProcessor, mediaExtractor.chooseMediaProcessor(MediaType.THREE_D)); - assertNull(mediaExtractor.chooseMediaProcessor(MediaType.OTHER)); + assertSame(imageProcessor, mediaExtractor.chooseMediaProcessor(MediaType.IMAGE,"image/subtype")); + assertSame(audioVideoProcessor, mediaExtractor.chooseMediaProcessor(MediaType.AUDIO,"audio/subtype")); + assertSame(audioVideoProcessor, mediaExtractor.chooseMediaProcessor(MediaType.VIDEO,"video/subtype")); + assertSame(textProcessor, mediaExtractor.chooseMediaProcessor(MediaType.TEXT, "text/subtype")); + assertSame(media3dProcessor, mediaExtractor.chooseMediaProcessor(MediaType.THREE_D,"model/subtype")); + assertSame(oEmbedProcessor, mediaExtractor.chooseMediaProcessor(MediaType.OTHER,"application/json+oembed")); + assertSame(oEmbedProcessor, mediaExtractor.chooseMediaProcessor(MediaType.OTHER,"application/xml+oembed")); } @Test @@ -221,7 +235,7 @@ void testProcessResource() throws MediaExtractionException, IOException { // Set processor. doReturn(audioVideoProcessor) - .when(mediaExtractor).chooseMediaProcessor(MediaType.getMediaType(detectedMimeType)); + .when(mediaExtractor).chooseMediaProcessor(MediaType.getMediaType(detectedMimeType), detectedMimeType); final ResourceExtractionResultImpl result1 = new ResourceExtractionResultImpl(null, null); doReturn(result1).when(audioVideoProcessor).extractMetadata(resource, detectedMimeType, hasMainThumbnail); final ResourceExtractionResultImpl result2 = new ResourceExtractionResultImpl(null, null); @@ -236,13 +250,13 @@ void testProcessResource() throws MediaExtractionException, IOException { verify(mediaExtractor, times(1)).detectAndVerifyMimeType(resource, ProcessingMode.REDUCED); verify(mediaExtractor, times(1)).verifyAndCorrectContentAvailability(resource, ProcessingMode.REDUCED, detectedMimeType); - + // Check what happens if we are not supposed to process assertThrows(IllegalStateException.class, - () -> mediaExtractor.performProcessing(resource, ProcessingMode.NONE, hasMainThumbnail)); + () -> mediaExtractor.performProcessing(resource, ProcessingMode.NONE, hasMainThumbnail)); // Check what happens if there is no processor - doReturn(null).when(mediaExtractor).chooseMediaProcessor(MediaType.getMediaType(detectedMimeType)); + doReturn(null).when(mediaExtractor).chooseMediaProcessor(MediaType.getMediaType(detectedMimeType), detectedMimeType); assertNull(mediaExtractor.performProcessing(resource, ProcessingMode.FULL, hasMainThumbnail)); assertNull(mediaExtractor.performProcessing(resource, ProcessingMode.REDUCED, hasMainThumbnail)); } @@ -257,10 +271,10 @@ void testPerformMediaExtraction() throws IOException, MediaExtractionException { doReturn(ProcessingMode.FULL).when(mediaExtractor).getMode(entry1); doReturn(resource1).when(resourceDownloadClient).downloadBasedOnMimeType(entry1); final ResourceExtractionResultImpl result1 = new ResourceExtractionResultImpl(null, null); - doReturn(result1).when(mediaExtractor).performProcessing(resource1, ProcessingMode.FULL, hasMainThumbnail ); + doReturn(result1).when(mediaExtractor).performProcessing(resource1, ProcessingMode.FULL, hasMainThumbnail); // Make the call and verify that the resource is closed. - assertSame(result1, mediaExtractor.performMediaExtraction(entry1, hasMainThumbnail )); + assertSame(result1, mediaExtractor.performMediaExtraction(entry1, hasMainThumbnail)); verify(resource1).close(); // mock for reduced processing @@ -269,10 +283,10 @@ void testPerformMediaExtraction() throws IOException, MediaExtractionException { doReturn(ProcessingMode.REDUCED).when(mediaExtractor).getMode(entry2); doReturn(resource2).when(resourceDownloadClient).downloadWithoutContent(entry2); final ResourceExtractionResultImpl result2 = new ResourceExtractionResultImpl(null, null); - doReturn(result2).when(mediaExtractor).performProcessing(resource2, ProcessingMode.REDUCED, hasMainThumbnail ); + doReturn(result2).when(mediaExtractor).performProcessing(resource2, ProcessingMode.REDUCED, hasMainThumbnail); // Make the call and verify that the resource is closed. - assertSame(result2, mediaExtractor.performMediaExtraction(entry2, hasMainThumbnail )); + assertSame(result2, mediaExtractor.performMediaExtraction(entry2, hasMainThumbnail)); verify(resource2).close(); // Check exception from downloading. @@ -280,14 +294,14 @@ void testPerformMediaExtraction() throws IOException, MediaExtractionException { doReturn(ProcessingMode.FULL).when(mediaExtractor).getMode(entry3); doThrow(IOException.class).when(resourceDownloadClient).downloadBasedOnMimeType(entry3); assertThrows(MediaExtractionException.class, - () -> mediaExtractor.performMediaExtraction(entry3, hasMainThumbnail )); + () -> mediaExtractor.performMediaExtraction(entry3, hasMainThumbnail)); doThrow(RuntimeException.class).when(resourceDownloadClient).downloadBasedOnMimeType(entry3); assertThrows(MediaExtractionException.class, - () -> mediaExtractor.performMediaExtraction(entry3, hasMainThumbnail )); + () -> mediaExtractor.performMediaExtraction(entry3, hasMainThumbnail)); // Verify sanity check doReturn(ProcessingMode.NONE).when(mediaExtractor).getMode(entry3); - assertNull(mediaExtractor.performMediaExtraction(entry3, hasMainThumbnail )); + assertNull(mediaExtractor.performMediaExtraction(entry3, hasMainThumbnail)); } @Test @@ -302,12 +316,15 @@ void testShouldDownloadForFullProcessing() { doReturn(true).when(textProcessor).downloadResourceForFullProcessing(); doReturn(false).when(audioVideoProcessor).downloadResourceForFullProcessing(); doReturn(false).when(media3dProcessor).downloadResourceForFullProcessing(); + doReturn(true).when(oEmbedProcessor).downloadResourceForFullProcessing(); assertTrue(mediaExtractor.shouldDownloadForFullProcessing("image/unknown_type")); assertTrue(mediaExtractor.shouldDownloadForFullProcessing("text/unknown_type")); assertFalse(mediaExtractor.shouldDownloadForFullProcessing("audio/unknown_type")); assertFalse(mediaExtractor.shouldDownloadForFullProcessing("video/unknown_type")); assertFalse(mediaExtractor.shouldDownloadForFullProcessing("model/unknown_type")); assertFalse(mediaExtractor.shouldDownloadForFullProcessing("unknown_type")); + assertTrue(mediaExtractor.shouldDownloadForFullProcessing("application/xml+oembed")); + assertTrue(mediaExtractor.shouldDownloadForFullProcessing("application/json+oembed")); } @Test @@ -330,8 +347,49 @@ void testGetMode() { testGetMode(ProcessingMode.FULL, EnumSet.allOf(UrlType.class)); } - private void testGetMode(ProcessingMode expected, Set urlTypes) { - final RdfResourceEntry entry = new RdfResourceEntry("url string", new ArrayList<>(urlTypes)); - assertEquals(expected, mediaExtractor.getMode(entry)); + @Test + void getOEmbedJson() throws MediaExtractionException, IOException { + final String resourceUrl = "https://vimeo.com/api/oembed.json?url=https%3A%2F%2Fvimeo.com%2F24416915"; + + final String detectedMimeType = "application/json+oembed"; + final RdfResourceEntry rdfResourceEntry = new RdfResourceEntry(resourceUrl, Collections.singletonList(UrlType.IS_SHOWN_BY)); + final Resource resource = spy( + new ResourceImpl(rdfResourceEntry, null, null, URI.create(resourceUrl))); + doReturn(true) + .when(resource).hasContent(); + doReturn(detectedMimeType) + .when(tika).detect(any(InputStream.class), any(Metadata.class)); + doReturn(Paths.get(getClass().getClassLoader().getResource("__files/oembed.json").getPath())) + .when(resource).getContentPath(); + doReturn(resource).when(resourceDownloadClient).downloadBasedOnMimeType(rdfResourceEntry); + ResourceExtractionResult extractionResult = new ResourceExtractionResultImpl( + new VideoResourceMetadata(detectedMimeType, resourceUrl, 0L)); + doReturn(extractionResult).when(oEmbedProcessor).extractMetadata(any(Resource.class), anyString(), anyBoolean()); + + ResourceExtractionResult resourceExtractionResult = mediaExtractor.performMediaExtraction(rdfResourceEntry, false); + assertEquals(resourceUrl, resourceExtractionResult.getMetadata().getResourceUrl()); + } + + @Test + void getOEmbedXml() throws MediaExtractionException, IOException { + final String resourceUrl = "https://vimeo.com/api/oembed.xml?url=https%3A%2F%2Fvimeo.com%2F24416915"; + + final String detectedMimeType = "application/xml+oembed"; + final RdfResourceEntry rdfResourceEntry = new RdfResourceEntry(resourceUrl, Collections.singletonList(UrlType.IS_SHOWN_BY)); + final ResourceImpl resource = spy( + new ResourceImpl(rdfResourceEntry, detectedMimeType, null, URI.create(resourceUrl))); + doReturn(true) + .when(resource).hasContent(); + doReturn(detectedMimeType) + .when(tika).detect(any(InputStream.class), any(Metadata.class)); + doReturn(Paths.get(getClass().getClassLoader().getResource("__files/oembed.xml").getPath())) + .when(resource).getContentPath(); + doReturn(resource).when(resourceDownloadClient).downloadBasedOnMimeType(rdfResourceEntry); + ResourceExtractionResult extractionResult = new ResourceExtractionResultImpl( + new VideoResourceMetadata(detectedMimeType, resourceUrl, 0L)); + doReturn(extractionResult).when(oEmbedProcessor).extractMetadata(any(Resource.class), anyString(), anyBoolean()); + + ResourceExtractionResult resourceExtractionResult = mediaExtractor.performMediaExtraction(rdfResourceEntry, false); + assertEquals(resourceUrl, resourceExtractionResult.getMetadata().getResourceUrl()); } } diff --git a/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/extraction/OEmbedProcessorTest.java b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/extraction/OEmbedProcessorTest.java new file mode 100644 index 000000000..0c57dd02d --- /dev/null +++ b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/extraction/OEmbedProcessorTest.java @@ -0,0 +1,106 @@ +package eu.europeana.metis.mediaprocessing.extraction; + +import static com.github.tomakehurst.wiremock.client.WireMock.aResponse; +import static com.github.tomakehurst.wiremock.client.WireMock.get; +import static com.github.tomakehurst.wiremock.core.WireMockConfiguration.wireMockConfig; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.github.tomakehurst.wiremock.junit5.WireMockExtension; +import eu.europeana.metis.mediaprocessing.MediaProcessorFactory; +import eu.europeana.metis.mediaprocessing.exception.MediaExtractionException; +import eu.europeana.metis.mediaprocessing.http.ResourceDownloadClient; +import eu.europeana.metis.mediaprocessing.model.RdfResourceEntry; +import eu.europeana.metis.mediaprocessing.model.Resource; +import eu.europeana.metis.mediaprocessing.model.ResourceExtractionResult; +import eu.europeana.metis.mediaprocessing.model.UrlType; +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; + +class OEmbedProcessorTest { + + @RegisterExtension + static WireMockExtension wireMockExtension = WireMockExtension.newInstance() + .options(wireMockConfig() + .dynamicPort() + .dynamicHttpsPort()) + .build(); + private OEmbedProcessor processor; + private ResourceDownloadClient resourceDownloadClient; + + private OEmbedResourceTest getOEmbedResourceTest(String filename, String detectedMimeType) throws IOException { + try (InputStream inputStream = getClass().getClassLoader().getResourceAsStream(filename)) { + byte[] audioBytes = inputStream.readAllBytes(); + wireMockExtension.stubFor(get("/api/resource?url=https://vimeo.com/24416915").willReturn(aResponse() + .withStatus(200) + .withBody(audioBytes) + .withHeader("Content-Disposition", "inline; filename=\"oembed.xml\""))); + } + final String resourceUrl = String.format("http://localhost:%d/api/resource?url=https://vimeo.com/24416915", + wireMockExtension.getPort()); + + final RdfResourceEntry rdfResourceEntry = new RdfResourceEntry(resourceUrl, Collections.singletonList(UrlType.IS_SHOWN_BY)); + final Resource resource = resourceDownloadClient.downloadBasedOnMimeType(rdfResourceEntry); + return new OEmbedResourceTest(resourceUrl, detectedMimeType, resource); + } + + private record OEmbedResourceTest(String resourceUrl, String detectedMimeType, Resource resource) { + + } + + @BeforeEach + void setUp() { + processor = new OEmbedProcessor(); + resourceDownloadClient = new ResourceDownloadClient(MediaProcessorFactory.DEFAULT_MAX_REDIRECT_COUNT, download -> true, + MediaProcessorFactory.DEFAULT_RESOURCE_CONNECT_TIMEOUT, + MediaProcessorFactory.DEFAULT_RESOURCE_RESPONSE_TIMEOUT, + MediaProcessorFactory.DEFAULT_RESOURCE_DOWNLOAD_TIMEOUT); + } + + @Test + void extractMetadata() throws MediaExtractionException, IOException { + // given + OEmbedResourceTest oembedResource = getOEmbedResourceTest("__files/oembed.xml", "application/xml+oembed"); + // when + ResourceExtractionResult resourceExtractionResult = processor.extractMetadata(oembedResource.resource(), + oembedResource.detectedMimeType(), true); + + // then + assertNotNull(resourceExtractionResult); + assertEquals(oembedResource.resourceUrl(), resourceExtractionResult.getMetadata().getResourceUrl()); + assertEquals(oembedResource.detectedMimeType(), resourceExtractionResult.getMetadata().getMimeType()); + } + + @Test + void copyMetadataWithOEmbed_expectNull() throws MediaExtractionException, IOException { + // given + OEmbedResourceTest oembedResource = getOEmbedResourceTest("__files/oembed.xml", "application/xml+oembed"); + // when + ResourceExtractionResult resourceExtractionResult = processor.copyMetadata(oembedResource.resource, + oembedResource.detectedMimeType); + // then + assertNull(resourceExtractionResult); + } + + @Test + void copyMetadataNotOEmbed_expectObject() throws MediaExtractionException, IOException { + // given + OEmbedResourceTest oembedResource = getOEmbedResourceTest("__files/not_oembed.xml", "application/xml"); + // when + ResourceExtractionResult resourceExtractionResult = processor.copyMetadata(oembedResource.resource, + oembedResource.detectedMimeType); + // then + assertNull(resourceExtractionResult); + } + + @Test + void downloadResourceForFullProcessing() { + assertTrue(processor.downloadResourceForFullProcessing()); + } +} diff --git a/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/extraction/oembed/OEmbedModelTest.java b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/extraction/oembed/OEmbedModelTest.java new file mode 100644 index 000000000..c2ae9174e --- /dev/null +++ b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/extraction/oembed/OEmbedModelTest.java @@ -0,0 +1,65 @@ +package eu.europeana.metis.mediaprocessing.extraction.oembed; + +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.getOEmbedModelFromJson; +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.getOEmbedModelFromXml; +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.hasValidHeightSizeThumbnail; +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.hasValidHeightSizeUrl; +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.hasValidWidthSizeThumbnail; +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.hasValidWidthSizeUrl; +import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.isValidOEmbedPhotoOrVideo; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; +import java.io.InputStream; +import org.junit.jupiter.api.Test; + +class OEmbedModelTest { + + @Test + void getOEmbedModelFromJsonTest() throws IOException { + InputStream inputStream = getClass().getClassLoader().getResourceAsStream("__files/oembed.json"); + + OEmbedModel oEmbedModel = getOEmbedModelFromJson(inputStream.readAllBytes()); + + assertNotNull(oEmbedModel); + assertTrue(isValidOEmbedPhotoOrVideo(oEmbedModel)); + } + + @Test + void getOEmbedModelFromXmlTest() throws IOException { + InputStream inputStream = getClass().getClassLoader().getResourceAsStream("__files/oembed.xml"); + + OEmbedModel oEmbedModel = getOEmbedModelFromXml(inputStream.readAllBytes()); + + assertNotNull(oEmbedModel); + assertTrue(isValidOEmbedPhotoOrVideo(oEmbedModel)); + } + + @Test + void checkValidWidthAndHeightDimensions() throws IOException { + String url = "https://vimeo.com/api/oembed.json?url=https%3A%2F%2Fcdn.pixabay.com%2Fvideo%2F2023%2F10%2F22%2F186070-876973719_small.mp4&maxheight=300&maxwidth=500"; + InputStream inputStream = getClass().getClassLoader().getResourceAsStream("__files/oembed.json"); + + OEmbedModel oEmbedModel = getOEmbedModelFromJson(inputStream.readAllBytes()); + + assertTrue(hasValidHeightSizeUrl(oEmbedModel, url)); + assertTrue(hasValidWidthSizeUrl(oEmbedModel, url)); + assertTrue(hasValidHeightSizeThumbnail(oEmbedModel, url)); + assertTrue(hasValidWidthSizeThumbnail(oEmbedModel, url)); + } + + @Test + void checkValidWidthAndHeightDimensions_InvalidUrl() throws IOException { + String url = "my url test"; + InputStream inputStream = getClass().getClassLoader().getResourceAsStream("__files/oembed.json"); + + OEmbedModel oEmbedModel = getOEmbedModelFromJson(inputStream.readAllBytes()); + + assertFalse(hasValidHeightSizeUrl(oEmbedModel, url)); + assertFalse(hasValidWidthSizeUrl(oEmbedModel, url)); + assertFalse(hasValidHeightSizeThumbnail(oEmbedModel, url)); + assertFalse(hasValidWidthSizeThumbnail(oEmbedModel, url)); + } +} diff --git a/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/http/MimeTypeDetectHttpClientTest.java b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/http/MimeTypeDetectHttpClientTest.java index bc76d4c7c..5dadf29b3 100644 --- a/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/http/MimeTypeDetectHttpClientTest.java +++ b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/http/MimeTypeDetectHttpClientTest.java @@ -6,15 +6,13 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import com.github.tomakehurst.wiremock.junit5.WireMockExtension; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; - +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; class MimeTypeDetectHttpClientTest { @@ -107,7 +105,7 @@ void download_returnProvidedStlMimeType_expectSuccess() throws IOException, URIS String detectedMimeType = mimeTypeDetectHttpClient.download(new URI(url).toURL()); // then - assertEquals("model/stl", detectedMimeType); + assertEquals("model/x.stl-binary", detectedMimeType); } @Test @@ -146,4 +144,41 @@ void download_detectMimeTypeGlb_expectSuccess() throws IOException, URISyntaxExc assertEquals("model/gltf-binary", detectedMimeType); } + @Test + void download_detectMimeTypeOembedJson_expectSuccess() throws IOException, URISyntaxException { + // given + try (InputStream inputStream = getClass().getClassLoader().getResourceAsStream("__files/oembed.json")) { + byte[] jsonBytes = inputStream.readAllBytes(); + wireMockExtension.stubFor(get("/api/oembed.json?url=https://vimeo.com/24416915") + .willReturn(aResponse() + .withStatus(200) + .withBody(jsonBytes) + .withHeader("Content-Disposition", "inline; filename=\"oembed.json\""))); + } + final String url = String.format("http://localhost:%d/api/oembed.json?url=https://vimeo.com/24416915", wireMockExtension.getPort()); + // when + String detectedMimeType = mimeTypeDetectHttpClient.download(new URI(url).toURL()); + + // then + assertEquals("application/json+oembed", detectedMimeType); + } + + @Test + void download_detectMimeTypeOembedXml_expectSuccess() throws IOException, URISyntaxException { + // given + try (InputStream inputStream = getClass().getClassLoader().getResourceAsStream("__files/oembed.xml")) { + byte[] xmlBytes = inputStream.readAllBytes(); + wireMockExtension.stubFor(get("/api/oembed.xml?url=https://vimeo.com/24416915") + .willReturn(aResponse() + .withStatus(200) + .withBody(xmlBytes) + .withHeader("Content-Disposition", "inline; filename=\"oembed.xml\""))); + } + final String url = String.format("http://localhost:%d/api/oembed.xml?url=https://vimeo.com/24416915", wireMockExtension.getPort()); + // when + String detectedMimeType = mimeTypeDetectHttpClient.download(new URI(url).toURL()); + + // then + assertEquals("application/xml+oembed", detectedMimeType); + } } diff --git a/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/model/WebResourceTest.java b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/model/WebResourceTest.java index ee40bb32a..f1132b156 100644 --- a/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/model/WebResourceTest.java +++ b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/model/WebResourceTest.java @@ -6,6 +6,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import eu.europeana.metis.schema.jibx.ColorSpaceType; +import eu.europeana.metis.schema.jibx.EdmType; import eu.europeana.metis.schema.jibx.WebResourceType; import eu.europeana.metis.schema.model.Orientation; import java.util.Arrays; @@ -212,4 +213,30 @@ void testSetResolution() { webResource.setResolution(null); assertNull(resourceType.getSpatialResolution()); } + + @Test + void testSetEdmType() { + final WebResourceType resourceType = new WebResourceType(); + final WebResource webResource = new WebResource(resourceType); + webResource.setEdmType(EdmType.VIDEO); + assertNotNull(resourceType.getType1()); + assertEquals(EdmType.VIDEO, resourceType.getType1().getType()); + + webResource.setEdmType(EdmType.IMAGE); + assertNotNull(resourceType.getType1()); + assertEquals(EdmType.IMAGE, resourceType.getType1().getType()); + } + + @Test + void testSetEdmTypeWithError() { + final WebResourceType resourceType = new WebResourceType(); + final WebResource webResource = new WebResource(resourceType); + + webResource.setEdmType(EdmType.TEXT); + assertNull(resourceType.getType1()); + webResource.setEdmType(EdmType.SOUND); + assertNull(resourceType.getType1()); + webResource.setEdmType(EdmType._3_D); + assertNull(resourceType.getType1()); + } } diff --git a/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/wrappers/OEmbedJsonFileDetectorTest.java b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/wrappers/OEmbedJsonFileDetectorTest.java new file mode 100644 index 000000000..92f59356e --- /dev/null +++ b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/wrappers/OEmbedJsonFileDetectorTest.java @@ -0,0 +1,30 @@ +package eu.europeana.metis.mediaprocessing.wrappers; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.junit.jupiter.api.Test; + +class OEmbedJsonFileDetectorTest { + + @Test + void detect() throws IOException { + OEmbedJsonFileDetector detector = new OEmbedJsonFileDetector(); + + MediaType result = detector.detect(getClass().getClassLoader().getResourceAsStream("__files/oembed.json"), new Metadata()); + + assertEquals("application/json+oembed", result.getType() + "/" + result.getSubtype()); + } + + @Test + void no_detect() throws IOException { + OEmbedJsonFileDetector detector = new OEmbedJsonFileDetector(); + + MediaType result = detector.detect(getClass().getClassLoader().getResourceAsStream("__files/not_oembed.json"), + new Metadata()); + + assertEquals("application/octet-stream", result.getType() + "/" + result.getSubtype()); + } +} diff --git a/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/wrappers/OEmbedXmlFileDetectorTest.java b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/wrappers/OEmbedXmlFileDetectorTest.java new file mode 100644 index 000000000..7f3b5e234 --- /dev/null +++ b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/wrappers/OEmbedXmlFileDetectorTest.java @@ -0,0 +1,29 @@ +package eu.europeana.metis.mediaprocessing.wrappers; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.junit.jupiter.api.Test; + +class OEmbedXmlFileDetectorTest { + + @Test + void detect() throws IOException { + OEmbedXmlFileDetector detector = new OEmbedXmlFileDetector(); + + MediaType result = detector.detect(getClass().getClassLoader().getResourceAsStream("__files/oembed.xml"), new Metadata()); + + assertEquals("application/xml+oembed", result.getType() + "/" + result.getSubtype()); + } + + @Test + void no_detect() throws IOException { + OEmbedXmlFileDetector detector = new OEmbedXmlFileDetector(); + + MediaType result = detector.detect(getClass().getClassLoader().getResourceAsStream("__files/not_oembed.xml"), new Metadata()); + + assertEquals("application/octet-stream", result.getType() + "/" + result.getSubtype()); + } +} diff --git a/metis-media-service/src/test/resources/__files/not_oembed.json b/metis-media-service/src/test/resources/__files/not_oembed.json new file mode 100644 index 000000000..080288e0a --- /dev/null +++ b/metis-media-service/src/test/resources/__files/not_oembed.json @@ -0,0 +1,25 @@ +{ + "glossary": { + "title": "example glossary", + "GlossDiv": { + "title": "S", + "GlossList": { + "GlossEntry": { + "ID": "SGML", + "SortAs": "SGML", + "GlossTerm": "Standard Generalized Markup Language", + "Acronym": "SGML", + "Abbrev": "ISO 8879:1986", + "GlossDef": { + "para": "A meta-markup language, used to create markup languages such as DocBook.", + "GlossSeeAlso": [ + "GML", + "XML" + ] + }, + "GlossSee": "markup" + } + } + } + } +} diff --git a/metis-media-service/src/test/resources/__files/not_oembed.xml b/metis-media-service/src/test/resources/__files/not_oembed.xml new file mode 100644 index 000000000..afeaf7be6 --- /dev/null +++ b/metis-media-service/src/test/resources/__files/not_oembed.xml @@ -0,0 +1,13 @@ + + + + Oxford + Oxfordshire + Wikipedia + 10000 + + + diff --git a/metis-media-service/src/test/resources/__files/oembed.json b/metis-media-service/src/test/resources/__files/oembed.json new file mode 100644 index 000000000..9e8e5fad7 --- /dev/null +++ b/metis-media-service/src/test/resources/__files/oembed.json @@ -0,0 +1,23 @@ +{ + "type": "video", + "version": "1.0", + "provider_name": "Vimeo", + "provider_url": "https://vimeo.com/", + "title": "Europeana promo", + "author_name": "Europeana", + "author_url": "https://vimeo.com/europeana", + "is_plus": "1", + "account_type": "plus", + "html": "", + "width": 480, + "height": 270, + "duration": 31, + "description": "", + "thumbnail_url": "https://i.vimeocdn.com/video/223856359-d86332b534f4edd01355ee14c50b32473f746e4f56454128df8a8ca8228fffb8-d_295x166", + "thumbnail_width": 295, + "thumbnail_height": 166, + "thumbnail_url_with_play_button": "https://i.vimeocdn.com/filter/overlay?src0=https%3A%2F%2Fi.vimeocdn.com%2Fvideo%2F223856359-d86332b534f4edd01355ee14c50b32473f746e4f56454128df8a8ca8228fffb8-d_295x166&src1=http%3A%2F%2Ff.vimeocdn.com%2Fp%2Fimages%2Fcrawler_play.png", + "upload_date": "2011-05-30 09:03:39", + "video_id": 24416915, + "uri": "/videos/24416915" +} diff --git a/metis-media-service/src/test/resources/__files/oembed.xml b/metis-media-service/src/test/resources/__files/oembed.xml new file mode 100644 index 000000000..a24a6ab7f --- /dev/null +++ b/metis-media-service/src/test/resources/__files/oembed.xml @@ -0,0 +1,31 @@ + + + video + 1.0 + Vimeo + https://vimeo.com/ + Europeana promo + Europeana + https://vimeo.com/europeana + 1 + plus + <iframe src="https://player.vimeo.com/video/24416915?app_id=122963" width="480" height="270" + frameborder="0" allow="autoplay; fullscreen; picture-in-picture; clipboard-write" title="Europeana + promo"></iframe> + + 480 + 270 + 31 + + + https://i.vimeocdn.com/video/223856359-d86332b534f4edd01355ee14c50b32473f746e4f56454128df8a8ca8228fffb8-d_295x166 + + 295 + 166 + + https://i.vimeocdn.com/filter/overlay?src0=https%3A%2F%2Fi.vimeocdn.com%2Fvideo%2F223856359-d86332b534f4edd01355ee14c50b32473f746e4f56454128df8a8ca8228fffb8-d_295x166&src1=http%3A%2F%2Ff.vimeocdn.com%2Fp%2Fimages%2Fcrawler_play.png + + 2011-05-30 09:03:39 + 24416915 + /videos/24416915 + diff --git a/metis-media-service/src/test/resources/__files/rdf_with_oembed_sample.xml b/metis-media-service/src/test/resources/__files/rdf_with_oembed_sample.xml new file mode 100644 index 000000000..7a40e8fc9 --- /dev/null +++ b/metis-media-service/src/test/resources/__files/rdf_with_oembed_sample.xml @@ -0,0 +1,164 @@ + + + + IMAGE + + + + + Vimeo video + Europeana + Europeana + + + + + 58.04861 + -2.343056 + 8.0 + Skara Brae + + + Neolithic + -3180 + -2500 + + + + + + + + + + + + + + + + + + Ort, an dem Überreste der Vergangenheit erhalten geblieben sind + Tietyllä paikalla kiinteästi sijaitsevien jäännösten muodostama kokonaisuus + Plats där fornlämning påträffats + Объект материальной культуры, несущий в себе определённый объём информации о прошлом + Local onde se concentram vestígios arquelógicos + Τοποθεσία στην οποία βρίσκονται ενδείξεις ανθρώπινης δραστηριότητας του παρελθόντος + Place (or group of physical sites) in which evidence of past activity is preserved + Mjesto na kojemu postoji veća količina sačuvanih izrađevina i tvorevina iz prošlosti + Luogo in cui si conservano tracce dell'attività umana del passato + Lieu ou groupe de sites physiques où sont préservées des preuves de l'activité préhistorique, + historique ou contemporaine + + Lugar donde se concentran vestigios arqueológicos + Místo nebo skupina míst, kde jsou zachovány důkazy a pozůstatky historické aktivity + Historiaurreko, historiako edo gaur egungo jardueraren froga materialak gordetzen dituen leku edo + gune fisikoen multzoa + + Lloc on es conserven vestigis arqueològics + Plaats waar men archeologische vondsten heeft gedaan + Archäologische Stätte + Археологическое место + Arkeologinen kohde + Sítio arqueológico + Археологически обект + Archeologinė vieta + Arheoloģiskais piemineklis + Arheološko nalazište + Site archéologique + Régészeti lelőhely + Archeologická lokalita + Arheološko najdišče + Suíomh seandálaíochta + Jaciment arqueològic + Arkeologisk lokal + Αρχαιολογική θέση + Archaeological site + Sito archeologico + Yacimiento arqueológico + Muistis + Arkeologia-aztarnategi + Archeologická lokalita + Stanowisko archeologiczne + Sit arheologic + Arkæologisk område + Archeologische vindplaats + + + + + + + + + + + + + + CMC_HA/2255 + eng + + true + + + + + + Dive into the heart of Spain's vibrant cultural landscape with this mesmerizing video. Explore + the electrifying atmosphere of La Tomatina in Buñol, the haunting beauty of Semana Santa processions in Seville, and the + thrilling bull runs of San Fermín in Pamplona. Revel in the passionate Flamenco performances, savor the diverse flavors of + Spanish cuisine, and discover the deep-rooted traditions that weave through Spain's history. This video showcases the + colorful, spirited, and deeply traditional festivals that define Spanish culture + + mov + http://3dicons.dcu.gr/object/HA/1255 + English + + CMC Associates + CMC + + + Settlement + Panorama Movie of link path 1-5, Skara Brae + Movie + 3D ICONS + + Neolithic + + false + + + VIDEO + + + + Europeana Foundation + Europeana Foundation + 307_local_31072024_1640 + Netherlands + nl + 10 + + + Europeana + + + + + + + + diff --git a/pom.xml b/pom.xml index 2d614d4fa..8fa466f82 100644 --- a/pom.xml +++ b/pom.xml @@ -184,7 +184,7 @@ 2.3.0 3.0.0 1.6.2 - 1.26 + 2.9.2 3.4.2 1.19.6 2.12.2