diff --git a/src/main/java/eu/europeana/metis/schema/convert/RdfConversionUtils.java b/src/main/java/eu/europeana/metis/schema/convert/RdfConversionUtils.java index 2662b21..898d01f 100644 --- a/src/main/java/eu/europeana/metis/schema/convert/RdfConversionUtils.java +++ b/src/main/java/eu/europeana/metis/schema/convert/RdfConversionUtils.java @@ -1,5 +1,6 @@ package eu.europeana.metis.schema.convert; +import eu.europeana.metis.schema.convert.model.RdfXmlElementMetadata; import eu.europeana.metis.schema.jibx.RDF; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -22,6 +23,7 @@ /** * Utility class for converting {@link RDF} to String and vice versa. + * @deprecated use {@link RdfSerializer} and/or {@link RdfDeserializer}. */ public class RdfConversionUtils { @@ -87,7 +89,7 @@ public String getQualifiedElementNameForClass(Class objectClass) { final RdfXmlElementMetadata rdfXmlElementMetadata = rdfXmlElementMetadataMap.get(objectClass.getCanonicalName()); Objects.requireNonNull(rdfXmlElementMetadata, String.format("Element metadata not found for class: %s", objectClass.getCanonicalName())); - return String.format("%s:%s", rdfXmlElementMetadata.getPrefix(), rdfXmlElementMetadata.getName()); + return String.format("%s:%s", rdfXmlElementMetadata.prefix(), rdfXmlElementMetadata.name()); } /** @@ -122,37 +124,6 @@ public String convertRdfToString(RDF rdf) throws SerializationException { } } - static class RdfXmlElementMetadata { - - final String canonicalClassName; - final String prefix; - final String namespace; - final String name; - - public RdfXmlElementMetadata(String canonicalClassName, String prefix, String namespace, String name) { - this.canonicalClassName = canonicalClassName; - this.prefix = prefix; - this.namespace = namespace; - this.name = name; - } - - public String getCanonicalClassName() { - return canonicalClassName; - } - - public String getPrefix() { - return prefix; - } - - public String getNamespace() { - return namespace; - } - - public String getName() { - return name; - } - } - /** * Convert a UTF-8 encoded XML to {@link RDF} * @@ -209,7 +180,7 @@ private void checkAndStoreMetadataInMap(final Map final String prefix = rdfBindingFactory.getPrefixes()[namespaceIndex]; final RdfXmlElementMetadata rdfXmlElementMetadata = new RdfXmlElementMetadata(canonicalName, prefix, elementNamespace, elementName); - rdfXmlElementMetadataMap.put(rdfXmlElementMetadata.getCanonicalClassName(), rdfXmlElementMetadata); + rdfXmlElementMetadataMap.put(rdfXmlElementMetadata.canonicalClassName(), rdfXmlElementMetadata); } } } diff --git a/src/main/java/eu/europeana/metis/schema/convert/RdfDeserializer.java b/src/main/java/eu/europeana/metis/schema/convert/RdfDeserializer.java new file mode 100644 index 0000000..3de9dec --- /dev/null +++ b/src/main/java/eu/europeana/metis/schema/convert/RdfDeserializer.java @@ -0,0 +1,240 @@ +package eu.europeana.metis.schema.convert; + +import static eu.europeana.metis.schema.convert.model.RdfXpathConstants.EDM_HAS_VIEW; +import static eu.europeana.metis.schema.convert.model.RdfXpathConstants.EDM_IS_SHOWN_AT; +import static eu.europeana.metis.schema.convert.model.RdfXpathConstants.EDM_IS_SHOWN_BY; +import static eu.europeana.metis.schema.convert.model.RdfXpathConstants.EDM_OBJECT; +import static eu.europeana.metis.schema.convert.model.RdfXpathConstants.EDM_WEBRESOURCE; +import static eu.europeana.metis.schema.convert.model.RdfXpathConstants.SVCS_SERVICE; + +import eu.europeana.metis.schema.convert.model.DeserializationOperation; +import eu.europeana.metis.schema.convert.model.RdfDeserializationException; +import eu.europeana.metis.schema.convert.model.RdfResourceEntry; +import eu.europeana.metis.schema.convert.model.ResourceInfo; +import eu.europeana.metis.schema.convert.model.UrlType; +import eu.europeana.metis.schema.convert.model.XPathExpressionWrapper; +import eu.europeana.metis.schema.jibx.RDF; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import org.jibx.runtime.BindingDirectory; +import org.jibx.runtime.IBindingFactory; +import org.jibx.runtime.IUnmarshallingContext; +import org.jibx.runtime.JiBXException; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +public class RdfDeserializer { + + private static final String UTF8 = StandardCharsets.UTF_8.name(); + private static final String OEMBED_NAMESPACE = "https://oembed.com/"; + private static final String XPATH_OEMBED_SERVICES = + SVCS_SERVICE + "[dcterms:conformsTo/@rdf:resource = \"" + OEMBED_NAMESPACE + "\"]"; + private static final String XPATH_OEMBED_WEB_RESOURCES = EDM_WEBRESOURCE + + "[svcs:has_service/@rdf:resource = " + XPATH_OEMBED_SERVICES + "/@rdf:about]"; + private static final String XPATH_IS_OEMBED_RESOURCE_CONDITION = "[. = " + + XPATH_OEMBED_WEB_RESOURCES + "/@rdf:about]"; + private static final String OEMBED_XPATH_CONDITION_IS_SHOWN_BY = + EDM_IS_SHOWN_BY + XPATH_IS_OEMBED_RESOURCE_CONDITION; + private static final String OEMBED_XPATH_CONDITION_HAS_VIEW = + EDM_HAS_VIEW + XPATH_IS_OEMBED_RESOURCE_CONDITION; + + private final XPathExpressionWrapper getObjectExpression = new XPathExpressionWrapper(xPath -> xPath.compile(EDM_OBJECT)); + private final XPathExpressionWrapper getHasViewExpression = new XPathExpressionWrapper(xPath -> xPath.compile(EDM_HAS_VIEW)); + private final XPathExpressionWrapper getIsShownAtExpression = new XPathExpressionWrapper( + xPath -> xPath.compile(EDM_IS_SHOWN_AT)); + private final XPathExpressionWrapper getIsShownByExpression = new XPathExpressionWrapper( + xPath -> xPath.compile(EDM_IS_SHOWN_BY)); + private final XPathExpressionWrapper getOEmbedExpression = new XPathExpressionWrapper( + xPath -> xPath.compile(OEMBED_XPATH_CONDITION_HAS_VIEW + " | " + OEMBED_XPATH_CONDITION_IS_SHOWN_BY)); + + private final IBindingFactory rdfBindingFactory; + + /** + * Default constructor + */ + public RdfDeserializer() { + this(RDF.class); + } + + /** + * Constructor supplying class type for the binding factory. + *

At the current state this is used for assisting testing

+ * + * @param classType the class object type + * @param the class type + */ + RdfDeserializer(Class classType) { + try { + rdfBindingFactory = BindingDirectory.getFactory(classType); + } catch (JiBXException e) { + throw new IllegalStateException("No binding factory available.", e); + } + } + + /** + * Convert a UTF-8 encoded XML to {@link RDF} + * + * @param xml the xml string + * @return the RDF object + * @throws RdfDeserializationException if during unmarshalling there is a failure + */ + public RDF deserialize(String xml) throws RdfDeserializationException { + try (final InputStream inputStream = new ByteArrayInputStream( + xml.getBytes(StandardCharsets.UTF_8))) { + return deserialize(inputStream); + } catch (IOException e) { + throw new RdfDeserializationException("Unexpected issue with byte stream.", e); + } + } + + /** + * Convert a UTF-8 encoded XML to {@link RDF} + * + * @param inputStream The xml. The stream is not closed. + * @return the RDF object + * @throws RdfDeserializationException if during unmarshalling there is a failure + */ + public RDF deserialize(InputStream inputStream) throws RdfDeserializationException { + try { + final IUnmarshallingContext context = rdfBindingFactory.createUnmarshallingContext(); + return (RDF) context.unmarshalDocument(inputStream, UTF8); + } catch (JiBXException e) { + throw new RdfDeserializationException( + "Something went wrong with converting to or from the RDF format.", e); + } + } + + public Document deserializeToDocument(InputStream inputStream) throws RdfDeserializationException { + + // Parse document to schema-agnostic XML document (but make parsing namespace-aware). + try { + final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + factory.setNamespaceAware(true); + return factory.newDocumentBuilder().parse(inputStream); + } catch (SAXException | IOException | ParserConfigurationException e) { + throw new RdfDeserializationException("Problem with deserializing record to XML document.", e); + } + } + + public RdfResourceEntry getMainThumbnailResource(byte[] input) throws RdfDeserializationException { + return performDeserialization(input, this::getMainThumbnailResource); + } + + public RdfResourceEntry getMainThumbnailResource(InputStream inputStream) + throws RdfDeserializationException { + return getMainThumbnailResource(deserializeToDocument(inputStream)).orElse(null); + } + + public Optional getMainThumbnailResource(Document document) + throws RdfDeserializationException { + + // Get the entries of the required types. + final Map resourceEntries = getResourceEntries(document, + Collections.singleton(UrlType.URL_TYPE_FOR_MAIN_THUMBNAIL_RESOURCE)); + + // If there is not exactly one, we return an empty optional. + if (resourceEntries.size() != 1) { + return Optional.empty(); + } + + // So there is exactly one. Convert and return. + return Optional.of(convertToResourceEntries(resourceEntries).get(0)); + } + + public List convertToResourceEntries( + Map urlWithTypes) { + return urlWithTypes.entrySet().stream().map(RdfDeserializer::convertToResourceEntry) + .toList(); + } + + private static RdfResourceEntry convertToResourceEntry(Map.Entry entry) { + return new RdfResourceEntry(entry.getKey(), entry.getValue().urlTypes(), + entry.getValue().configuredForOembed()); + } + + /** + * Gets resource entries. + * + * @param document the document + * @param allowedUrlTypes the allowed url types + * @return the resource entries + * @throws RdfDeserializationException the rdf deserialization exception + */ + public Map getResourceEntries(Document document, + Set allowedUrlTypes) throws RdfDeserializationException { + + // Get the resources and their types. + final Map> urls = new HashMap<>(); + for (UrlType type : allowedUrlTypes) { + final Set urlsForType = getUrls(document, type); + for (String url : urlsForType) { + urls.computeIfAbsent(url, k -> new HashSet<>()).add(type); + } + } + + // For each resource, check whether they are configured for oEmbed. + final Map result = HashMap.newHashMap(urls.size()); + final Set oEmbedUrls = getOEmbedUrls(document); + for (Entry> entry : urls.entrySet()) { + boolean isConfiguredForOembed = oEmbedUrls.contains(entry.getKey()); + result.put(entry.getKey(), new ResourceInfo(entry.getValue(), isConfiguredForOembed)); + } + + // Done + return result; + } + + private Set getUrls(Document document, UrlType type) throws RdfDeserializationException { + + // Determine the right expression to apply. + final XPathExpressionWrapper expression = + switch (type) { + case OBJECT -> getObjectExpression; + case HAS_VIEW -> getHasViewExpression; + case IS_SHOWN_AT -> getIsShownAtExpression; + case IS_SHOWN_BY -> getIsShownByExpression; + }; + + // Evaluate the expression and convert the node list to a set of attribute values. + final NodeList nodes = expression.evaluate(document); + return IntStream.range(0, nodes.getLength()).mapToObj(nodes::item).map(Node::getNodeValue) + .collect(Collectors.toSet()); + } + + private Set getOEmbedUrls(Document document) throws RdfDeserializationException { + final NodeList oEmbedNodes = getOEmbedExpression.evaluate(document); + return IntStream.range(0, oEmbedNodes.getLength()) + .mapToObj(oEmbedNodes::item) + .map(Node::getNodeValue) + .collect(Collectors.toSet()); + } + + public R performDeserialization(byte[] input, DeserializationOperation operation) + throws RdfDeserializationException { + try (InputStream inputStream = new ByteArrayInputStream(input)) { + return operation.performDeserialization(inputStream); + } catch (IOException e) { + throw new RdfDeserializationException("Problem with reading byte array - Shouldn't happen.", e); + } + } + +} diff --git a/src/main/java/eu/europeana/metis/schema/convert/RdfSerializer.java b/src/main/java/eu/europeana/metis/schema/convert/RdfSerializer.java new file mode 100644 index 0000000..24022e2 --- /dev/null +++ b/src/main/java/eu/europeana/metis/schema/convert/RdfSerializer.java @@ -0,0 +1,146 @@ +package eu.europeana.metis.schema.convert; + +import eu.europeana.metis.schema.convert.model.RdfSerializationException; +import eu.europeana.metis.schema.convert.model.RdfXmlElementMetadata; +import eu.europeana.metis.schema.jibx.RDF; +import java.io.ByteArrayOutputStream; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.IntStream; +import org.jibx.runtime.BindingDirectory; +import org.jibx.runtime.IBindingFactory; +import org.jibx.runtime.IMarshallingContext; +import org.jibx.runtime.JiBXException; + +public class RdfSerializer { + + @SuppressWarnings("java:S5852") //This regex is safe, and it's only meant for internal use without use input + private static final Pattern complexTypePattern = Pattern.compile("^\\{(.*)}:(.*)$"); + private static final String UTF8 = StandardCharsets.UTF_8.name(); + private static final int INDENTATION_SPACE = 2; + private final IBindingFactory rdfBindingFactory; + private final Map rdfXmlElementMetadataMap; + + /** + * Default constructor + */ + public RdfSerializer() { + this(RDF.class); + } + + /** + * Constructor supplying class type for the binding factory. + *

At the current state this is used for assisting testing

+ * + * @param classType the class object type + * @param the class type + */ + RdfSerializer(Class classType) { + try { + rdfBindingFactory = BindingDirectory.getFactory(classType); + rdfXmlElementMetadataMap = initializeRdfXmlElementMetadataMap(); + } catch (JiBXException e) { + throw new IllegalStateException("No binding factory available.", e); + } + } + + /** + * Convert an {@link RDF} to a UTF-8 encoded XML + * + * @param rdf The RDF object to convert + * @return An XML string representation of the RDF object + * @throws RdfSerializationException if during marshalling there is a failure + */ + public String serialize(RDF rdf) throws RdfSerializationException { + try { + return new String(convertRdfToBytes(rdf), UTF8); + } catch (UnsupportedEncodingException e) { + throw new IllegalStateException("Unexpected exception - should not occur.", e); + } + } + + /** + * Convert an {@link RDF} to a UTF-8 encoded XML + * + * @param rdf The RDF object to convert + * @return An XML string representation of the RDF object + * @throws RdfSerializationException if during marshalling there is a failure + */ + public byte[] convertRdfToBytes(RDF rdf) throws RdfSerializationException { + try { + IMarshallingContext context = rdfBindingFactory.createMarshallingContext(); + context.setIndent(INDENTATION_SPACE); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + context.marshalDocument(rdf, UTF8, null, out); + return out.toByteArray(); + } catch (JiBXException e) { + throw new RdfSerializationException( + "Something went wrong with converting to or from the RDF format.", e); + } + } + + /** + * Get the xml representation of a class that will contain the namespace prefix and the element name. E.g. dc:subject + *

This class uses the internal static map that should be generated with regards to the RDF jibx classes

+ * + * @param objectClass the jibx object class to search for + * @return the xml representation + */ + public String getQualifiedElementNameForClass(Class objectClass) { + final RdfXmlElementMetadata rdfXmlElementMetadata = rdfXmlElementMetadataMap.get(objectClass.getCanonicalName()); + Objects.requireNonNull(rdfXmlElementMetadata, + String.format("Element metadata not found for class: %s", objectClass.getCanonicalName())); + return String.format("%s:%s", rdfXmlElementMetadata.prefix(), rdfXmlElementMetadata.name()); + } + + /** + * Collect all information that we can get for jibx classes from the {@link IBindingFactory}. + */ + private Map initializeRdfXmlElementMetadataMap() { + Map elementMetadataMap = new HashMap<>(); + for (int i = 0; i < rdfBindingFactory.getMappedClasses().length; i++) { + final String canonicalName; + final String elementNamespace; + final String elementName; + final Matcher matcher = complexTypePattern.matcher(rdfBindingFactory.getMappedClasses()[i]); + if (matcher.matches()) { + //Complex type search + elementNamespace = matcher.group(1); + elementName = matcher.group(2); + final Pattern canonicalClassNamePattern = Pattern.compile(String.format("^(.*)\\.(%s)$", elementName)); + canonicalName = Arrays.stream(rdfBindingFactory.getAbstractMappings()).flatMap(Arrays::stream) + .filter(Objects::nonNull) + .filter(input -> canonicalClassNamePattern.matcher(input).matches()) + .findFirst().orElse(null); + } else { + //Simple type search + elementNamespace = rdfBindingFactory.getElementNamespaces()[i]; + elementName = rdfBindingFactory.getElementNames()[i]; + canonicalName = rdfBindingFactory.getMappedClasses()[i]; + } + checkAndStoreMetadataInMap(elementMetadataMap, canonicalName, elementNamespace, elementName); + } + return elementMetadataMap; + } + + private void checkAndStoreMetadataInMap(final Map rdfXmlElementMetadataMap, + String canonicalName, String elementNamespace, String elementName) { + //Store only if we could find the canonical name properly + if (canonicalName != null) { + final int namespaceIndex = IntStream.range(0, rdfBindingFactory.getNamespaces().length) + .filter(j -> rdfBindingFactory.getNamespaces()[j].equals(elementNamespace)) + .findFirst().orElseThrow(); + final String prefix = rdfBindingFactory.getPrefixes()[namespaceIndex]; + final RdfXmlElementMetadata rdfXmlElementMetadata = new RdfXmlElementMetadata(canonicalName, prefix, elementNamespace, + elementName); + rdfXmlElementMetadataMap.put(rdfXmlElementMetadata.canonicalClassName(), rdfXmlElementMetadata); + } + } + +} diff --git a/src/main/java/eu/europeana/metis/schema/convert/SerializationException.java b/src/main/java/eu/europeana/metis/schema/convert/SerializationException.java index 77efce3..f4b6e0c 100644 --- a/src/main/java/eu/europeana/metis/schema/convert/SerializationException.java +++ b/src/main/java/eu/europeana/metis/schema/convert/SerializationException.java @@ -2,6 +2,7 @@ /** * Exception that marks a failure in serializing or deserializing. + * @deprecated */ public class SerializationException extends Exception { diff --git a/src/main/java/eu/europeana/metis/schema/convert/model/AbstractThreadSafeWrapper.java b/src/main/java/eu/europeana/metis/schema/convert/model/AbstractThreadSafeWrapper.java new file mode 100644 index 0000000..8cd90fb --- /dev/null +++ b/src/main/java/eu/europeana/metis/schema/convert/model/AbstractThreadSafeWrapper.java @@ -0,0 +1,64 @@ +package eu.europeana.metis.schema.convert.model; + +/** + * This class wraps an element and makes it available in a thread-safe way (by synchronizing + * access). It also performs lazy creation: the object is assumed to be 'expensive' to create and + * this will only be done if it is needed. + * + * @param The type of the object. + * @param The type of the exception that may be thrown during creation and/or processing. + */ +public abstract class AbstractThreadSafeWrapper { + + private final ThrowingSupplier objectCreator; + private T wrappedObject; + + /** + * Constructor. + * + * @param objectCreator The supplier of the object. + */ + protected AbstractThreadSafeWrapper(ThrowingSupplier objectCreator) { + this.objectCreator = objectCreator; + } + + /** + * Provides access to the object. + * + * @param processor The operation that needs to be executed on the object. + * @param The output/result type of the operation. + * @return The output/result of the operation. + * @throws E In case there was a problem. + */ + protected O process(ThrowingFunction processor) throws E { + synchronized (this) { + if (wrappedObject == null) { + wrappedObject = objectCreator.get(); + } + return processor.apply(wrappedObject); + } + } + + @FunctionalInterface + public interface ThrowingSupplier { + + /** + * Supply the value. + * @return The value. + * @throws E In case something went wrong supplying the value. + */ + O get() throws E; + } + + @FunctionalInterface + public interface ThrowingFunction { + + /** + * Apply the function on the input. + * @param input The input. + * @return The result. + * @throws E In case something went wrong applying the function to the input. + */ + O apply(I input) throws E; + } +} diff --git a/src/main/java/eu/europeana/metis/schema/convert/model/DeserializationOperation.java b/src/main/java/eu/europeana/metis/schema/convert/model/DeserializationOperation.java new file mode 100644 index 0000000..80db4ad --- /dev/null +++ b/src/main/java/eu/europeana/metis/schema/convert/model/DeserializationOperation.java @@ -0,0 +1,15 @@ +package eu.europeana.metis.schema.convert.model; + +import java.io.InputStream; + +@FunctionalInterface +public interface DeserializationOperation { + /** + * Perform deserialization r. + * + * @param inputStream the input stream + * @return the r + * @throws RdfDeserializationException the rdf deserialization exception + */ + R performDeserialization(InputStream inputStream) throws RdfDeserializationException; +} diff --git a/src/main/java/eu/europeana/metis/schema/convert/model/RdfDeserializationException.java b/src/main/java/eu/europeana/metis/schema/convert/model/RdfDeserializationException.java new file mode 100644 index 0000000..5eda113 --- /dev/null +++ b/src/main/java/eu/europeana/metis/schema/convert/model/RdfDeserializationException.java @@ -0,0 +1,22 @@ +package eu.europeana.metis.schema.convert.model; + +import java.io.Serial; + +/** + * This exception represents a problem that occurred during deserialization of an RDF object. + */ +public class RdfDeserializationException extends Exception { + + /** This class implements {@link java.io.Serializable}. **/ + @Serial private static final long serialVersionUID = -789223924131348847L; + + /** + * Constructor. + * + * @param message The exception message. + * @param cause The cause. + */ + public RdfDeserializationException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/src/main/java/eu/europeana/metis/schema/convert/model/RdfNamespaceContext.java b/src/main/java/eu/europeana/metis/schema/convert/model/RdfNamespaceContext.java new file mode 100644 index 0000000..676a1b6 --- /dev/null +++ b/src/main/java/eu/europeana/metis/schema/convert/model/RdfNamespaceContext.java @@ -0,0 +1,60 @@ +package eu.europeana.metis.schema.convert.model; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import javax.xml.XMLConstants; +import javax.xml.namespace.NamespaceContext; + +/** + * This class provides a {@link NamespaceContext} implementation for RDF documents that can be used + * when parsing or compiling (serializing or deserializing) XML documents. + */ +public class RdfNamespaceContext implements NamespaceContext { + + public static final String RDF_NAMESPACE_PREFIX = "rdf"; + public static final String EDM_NAMESPACE_PREFIX = "edm"; + public static final String ORE_NAMESPACE_PREFIX = "ore"; + public static final String SVCS_NAMESPACE_PREFIX = "svcs"; + public static final String DCTERMS_NAMESPACE_PREFIX = "dcterms"; + + private static final Map PREFIX_TO_NAMESPACE_MAP = new HashMap<>(); + + static { + PREFIX_TO_NAMESPACE_MAP.put(XMLConstants.DEFAULT_NS_PREFIX, XMLConstants.NULL_NS_URI); + PREFIX_TO_NAMESPACE_MAP.put(XMLConstants.XML_NS_PREFIX, XMLConstants.XML_NS_URI); + PREFIX_TO_NAMESPACE_MAP + .put(XMLConstants.XMLNS_ATTRIBUTE, XMLConstants.XMLNS_ATTRIBUTE_NS_URI); + PREFIX_TO_NAMESPACE_MAP.put(RDF_NAMESPACE_PREFIX, "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); + PREFIX_TO_NAMESPACE_MAP.put(ORE_NAMESPACE_PREFIX, "http://www.openarchives.org/ore/terms/"); + PREFIX_TO_NAMESPACE_MAP.put(EDM_NAMESPACE_PREFIX, "http://www.europeana.eu/schemas/edm/"); + PREFIX_TO_NAMESPACE_MAP.put(SVCS_NAMESPACE_PREFIX,"http://rdfs.org/sioc/services#"); + PREFIX_TO_NAMESPACE_MAP.put(DCTERMS_NAMESPACE_PREFIX, "http://purl.org/dc/terms/"); + } + + @Override + public String getNamespaceURI(String s) { + if (s == null) { + throw new IllegalArgumentException(); + } + return Optional.ofNullable(PREFIX_TO_NAMESPACE_MAP.get(s)).orElse(XMLConstants.NULL_NS_URI); + } + + @Override + public String getPrefix(String s) { + if (s == null) { + throw new IllegalArgumentException(); + } + return PREFIX_TO_NAMESPACE_MAP.entrySet().stream().filter(entry -> entry.getValue().equals(s)) + .map(Entry::getKey).findAny().orElse(null); + } + + @Override + public Iterator getPrefixes(String s) { + return Optional.ofNullable(getPrefix(s)).map(Collections::singletonList) + .orElseGet(Collections::emptyList).iterator(); + } +} diff --git a/src/main/java/eu/europeana/metis/schema/convert/model/RdfResourceEntry.java b/src/main/java/eu/europeana/metis/schema/convert/model/RdfResourceEntry.java new file mode 100644 index 0000000..7d188de --- /dev/null +++ b/src/main/java/eu/europeana/metis/schema/convert/model/RdfResourceEntry.java @@ -0,0 +1,64 @@ +package eu.europeana.metis.schema.convert.model; + +import java.io.Serial; +import java.io.Serializable; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +/** + * This object represents a resource entry in an RDF file. It contains the resource URL, and a list of reference types: the way + * this resource is referenced from within the RDF. + */ +public class RdfResourceEntry implements Serializable { + + /** + * Implements {@link Serializable} + **/ + @Serial private static final long serialVersionUID = -5873067668837140080L; + + private String resourceUrl; + private Set urlTypes; + + private boolean resourceConfiguredForOembed; + + /** + * Constructor. + * + * @param resourceUrl The URL of the resource. + * @param urlTypes The resource URL types with which this resource is referenced. + * @param resourceConfiguredForOembed If the resource is configured in the record as if it were + * an oEmbed resource. + */ + public RdfResourceEntry(String resourceUrl, Collection urlTypes, + boolean resourceConfiguredForOembed) { + this.resourceUrl = resourceUrl; + this.urlTypes = new HashSet<>(urlTypes); + this.resourceConfiguredForOembed = resourceConfiguredForOembed; + } + + /** + * Constructor. Don't use this: it's required for deserialization. + */ + RdfResourceEntry() { + } + + public String getResourceUrl() { + return resourceUrl; + } + + public Set getUrlTypes() { + return Collections.unmodifiableSet(urlTypes); + } + + public boolean isResourceConfiguredForOembed() { + return resourceConfiguredForOembed; + } + + @Override + public String toString() { + return String.format("%s{resourceUrl=%s, urlTypes=%s, oembed=%s}", + RdfResourceEntry.class.getSimpleName(), resourceUrl, urlTypes, resourceConfiguredForOembed); + } +} diff --git a/src/main/java/eu/europeana/metis/schema/convert/model/RdfSerializationException.java b/src/main/java/eu/europeana/metis/schema/convert/model/RdfSerializationException.java new file mode 100644 index 0000000..0c2bac7 --- /dev/null +++ b/src/main/java/eu/europeana/metis/schema/convert/model/RdfSerializationException.java @@ -0,0 +1,22 @@ +package eu.europeana.metis.schema.convert.model; + +import java.io.Serial; + +/** + * This exception represents a problem that occurred during serialization of an RDF object. + */ +public class RdfSerializationException extends Exception { + + /** This class implements {@link java.io.Serializable}. **/ + @Serial private static final long serialVersionUID = 1031549407979593963L; + + /** + * Constructor. + * + * @param message The exception message. + * @param cause The cause. + */ + public RdfSerializationException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/src/main/java/eu/europeana/metis/schema/convert/model/RdfXmlElementMetadata.java b/src/main/java/eu/europeana/metis/schema/convert/model/RdfXmlElementMetadata.java new file mode 100644 index 0000000..627ef96 --- /dev/null +++ b/src/main/java/eu/europeana/metis/schema/convert/model/RdfXmlElementMetadata.java @@ -0,0 +1,4 @@ +package eu.europeana.metis.schema.convert.model; + +public record RdfXmlElementMetadata(String canonicalClassName, String prefix, String namespace, String name) { +} diff --git a/src/main/java/eu/europeana/metis/schema/convert/model/RdfXpathConstants.java b/src/main/java/eu/europeana/metis/schema/convert/model/RdfXpathConstants.java new file mode 100644 index 0000000..0e56b11 --- /dev/null +++ b/src/main/java/eu/europeana/metis/schema/convert/model/RdfXpathConstants.java @@ -0,0 +1,19 @@ +package eu.europeana.metis.schema.convert.model; + +/** + * Rdf xpath string constants. + */ +public final class RdfXpathConstants { + + public static final String RDF_NAMESPACE = "/rdf:RDF"; + public static final String ORE_AGGREGATION = RDF_NAMESPACE + "/ore:Aggregation"; + public static final String EDM_OBJECT = ORE_AGGREGATION + "/edm:object/@rdf:resource"; + public static final String EDM_IS_SHOWN_BY = ORE_AGGREGATION + "/edm:isShownBy/@rdf:resource"; + public static final String EDM_HAS_VIEW = ORE_AGGREGATION + "/edm:hasView/@rdf:resource"; + public static final String EDM_IS_SHOWN_AT = ORE_AGGREGATION + "/edm:isShownAt/@rdf:resource"; + public static final String SVCS_SERVICE = RDF_NAMESPACE + "/svcs:Service"; + public static final String EDM_WEBRESOURCE = RDF_NAMESPACE + "/edm:WebResource"; + + private RdfXpathConstants() {} + +} diff --git a/src/main/java/eu/europeana/metis/schema/convert/model/ResourceInfo.java b/src/main/java/eu/europeana/metis/schema/convert/model/ResourceInfo.java new file mode 100644 index 0000000..e55df83 --- /dev/null +++ b/src/main/java/eu/europeana/metis/schema/convert/model/ResourceInfo.java @@ -0,0 +1,5 @@ +package eu.europeana.metis.schema.convert.model; + +import java.util.Set; + +public record ResourceInfo(Set urlTypes, boolean configuredForOembed) {} diff --git a/src/main/java/eu/europeana/metis/schema/convert/model/UrlType.java b/src/main/java/eu/europeana/metis/schema/convert/model/UrlType.java new file mode 100644 index 0000000..d1845b7 --- /dev/null +++ b/src/main/java/eu/europeana/metis/schema/convert/model/UrlType.java @@ -0,0 +1,32 @@ +package eu.europeana.metis.schema.convert.model; + +import java.util.Collections; +import java.util.EnumSet; +import java.util.Set; + +/** + * The resource reference types that are used in RDF files to reference resources. This list is not + * complete: it only contains those types that are considered for media processing. + */ +public enum UrlType { + + OBJECT, HAS_VIEW, IS_SHOWN_BY, IS_SHOWN_AT; + + /** + * The resource URL types that are subject to link checking. + **/ + public static final Set URL_TYPES_FOR_LINK_CHECKING = Collections + .unmodifiableSet(EnumSet.allOf(UrlType.class)); + + /** + * The resource URL types that are subject to media extraction. + **/ + public static final Set URL_TYPES_FOR_MEDIA_EXTRACTION = Collections + .unmodifiableSet(EnumSet.allOf(UrlType.class)); + + /** + * The resource URL type that is subject to media extraction and provide the main thumbnail. + * This is a member of {@link #URL_TYPES_FOR_MEDIA_EXTRACTION}. + */ + public static final UrlType URL_TYPE_FOR_MAIN_THUMBNAIL_RESOURCE = UrlType.OBJECT; +} diff --git a/src/main/java/eu/europeana/metis/schema/convert/model/XPathExpressionWrapper.java b/src/main/java/eu/europeana/metis/schema/convert/model/XPathExpressionWrapper.java new file mode 100644 index 0000000..1f7e297 --- /dev/null +++ b/src/main/java/eu/europeana/metis/schema/convert/model/XPathExpressionWrapper.java @@ -0,0 +1,52 @@ +package eu.europeana.metis.schema.convert.model; + +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpression; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; + +public class XPathExpressionWrapper extends + AbstractThreadSafeWrapper { + + /** + * Instantiates a new X path expression wrapper. + * + * @param expressionCreator the expression creator + */ + public XPathExpressionWrapper( + ThrowingFunction expressionCreator) { + super(() -> { + final XPathFactory factory; + synchronized (XPathFactory.class) { + factory = XPathFactory.newInstance(); + } + final XPath xPath = factory.newXPath(); + xPath.setNamespaceContext(new RdfNamespaceContext()); + try { + return expressionCreator.apply(xPath); + } catch (XPathExpressionException e) { + throw new RdfDeserializationException("Could not initialize xpath expression.", e); + } + }); + } + + /** + * Evaluate node list. + * + * @param document the document + * @return the node list + * @throws RdfDeserializationException the rdf deserialization exception + */ + public NodeList evaluate(Document document) throws RdfDeserializationException { + return process(compiledExpression -> { + try { + return (NodeList) compiledExpression.evaluate(document, XPathConstants.NODESET); + } catch (XPathExpressionException e) { + throw new RdfDeserializationException("Problem with deserializing RDF.", e); + } + }); + } +}