diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java index 7b4440a4..00010d46 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java @@ -36,7 +36,7 @@ * @author Pascal Christoph (dr0i) dug it up again */ -@Description("Encodes a stream into MARCXML.") +@Description("Encodes a stream into MARCXML. If you can't ensure valid MARC21 (e.g. the leader isn't correct or not set as one literal) then set the parameter `ensureCorrectMarc21Xml` to `true`.") @In(StreamReceiver.class) @Out(String.class) @FluxCommand("encode-marcxml") @@ -47,6 +47,7 @@ public final class MarcXmlEncoder extends DefaultStreamPipe"; private static final String ROOT_CLOSE = ""; @@ -104,27 +105,23 @@ public String close(final Object[] args) { private static final int TAG_BEGIN = 0; private static final int TAG_END = 3; - private final StringBuilder builder = new StringBuilder(); + private final Encoder encoder = new Encoder(); + private final Marc21Decoder decoder = new Marc21Decoder(); + private final Marc21Encoder wrapper = new Marc21Encoder(); - private boolean atStreamStart = true; - - private boolean omitXmlDeclaration = OMIT_XML_DECLARATION; - private String xmlVersion = XML_VERSION; - private String xmlEncoding = XML_ENCODING; - - private String currentEntity = ""; - - private boolean emitNamespace = true; - private Object[] namespacePrefix = new Object[]{emitNamespace ? NAMESPACE_PREFIX : EMPTY}; - - private int indentationLevel; - private boolean formatted = PRETTY_PRINTED; - private int recordAttributeOffset; + private DefaultStreamPipe> pipe; /** * Creates an instance of {@link MarcXmlEncoder}. */ public MarcXmlEncoder() { + decoder.setEmitLeaderAsWhole(true); + + wrapper + .setReceiver(decoder) + .setReceiver(encoder); + + setEnsureCorrectMarc21Xml(ENSURE_CORRECT_MARC21_XML); } /** @@ -134,8 +131,7 @@ public MarcXmlEncoder() { * @param emitNamespace true if the namespace is emitted, otherwise false */ public void setEmitNamespace(final boolean emitNamespace) { - this.emitNamespace = emitNamespace; - namespacePrefix = new Object[]{emitNamespace ? NAMESPACE_PREFIX : EMPTY}; + encoder.setEmitNamespace(emitNamespace); } /** @@ -147,7 +143,7 @@ public void setEmitNamespace(final boolean emitNamespace) { * false */ public void omitXmlDeclaration(final boolean currentOmitXmlDeclaration) { - omitXmlDeclaration = currentOmitXmlDeclaration; + encoder.omitXmlDeclaration(currentOmitXmlDeclaration); } /** @@ -158,7 +154,7 @@ public void omitXmlDeclaration(final boolean currentOmitXmlDeclaration) { * @param xmlVersion the XML version */ public void setXmlVersion(final String xmlVersion) { - this.xmlVersion = xmlVersion; + encoder.setXmlVersion(xmlVersion); } /** @@ -169,7 +165,21 @@ public void setXmlVersion(final String xmlVersion) { * @param xmlEncoding the XML encoding */ public void setXmlEncoding(final String xmlEncoding) { - this.xmlEncoding = xmlEncoding; + encoder.setXmlEncoding(xmlEncoding); + } + + /** + * Sets to ensure correct MARC21 XML. + * If true, the input data is validated to ensure correct MARC21. Also the leader may be generated. + * It acts as a wrapper: the input is piped to {@link org.metafacture.biblio.marc21.Marc21Encoder}, whose output is piped to {@link org.metafacture.biblio.marc21.Marc21Decoder}, whose output is piped to {@link org.metafacture.biblio.marc21.MarcXmlEncoder}. + * This validation and treatment of the leader is more safe but comes with a performance impact. + * + * Default value: {@value #ENSURE_CORRECT_MARC21_XML} + * + * @param ensureCorrectMarc21Xml if true the input data is validated to ensure correct MARC21. Also the leader may be generated. + */ + public void setEnsureCorrectMarc21Xml(final boolean ensureCorrectMarc21Xml) { + pipe = ensureCorrectMarc21Xml ? wrapper : encoder; } /** @@ -180,189 +190,291 @@ public void setXmlEncoding(final String xmlEncoding) { * @param formatted true if formatting is activated, otherwise false */ public void setFormatted(final boolean formatted) { - this.formatted = formatted; + encoder.setFormatted(formatted); } @Override public void startRecord(final String identifier) { - if (atStreamStart) { - if (!omitXmlDeclaration) { - writeHeader(); - prettyPrintNewLine(); - } - writeTag(Tag.collection::open, emitNamespace ? NAMESPACE_SUFFIX : EMPTY, emitNamespace ? SCHEMA_ATTRIBUTES : EMPTY); - prettyPrintNewLine(); - incrementIndentationLevel(); - } - atStreamStart = false; - - prettyPrintIndentation(); - writeTag(Tag.record::open); - recordAttributeOffset = builder.length() - 1; - prettyPrintNewLine(); - - incrementIndentationLevel(); + pipe.startRecord(identifier); } @Override public void endRecord() { - decrementIndentationLevel(); - prettyPrintIndentation(); - writeTag(Tag.record::close); - prettyPrintNewLine(); - sendAndClearData(); + pipe.endRecord(); } @Override public void startEntity(final String name) { - currentEntity = name; - if (!name.equals(Marc21EventNames.LEADER_ENTITY)) { - if (name.length() != LEADER_ENTITY_LENGTH) { - final String message = String.format("Entity too short." + "Got a string ('%s') of length %d." + - "Expected a length of " + LEADER_ENTITY_LENGTH + " (field + indicators).", name, name.length()); - throw new MetafactureException(message); + pipe.startEntity(name); + } + + @Override + public void endEntity() { + pipe.endEntity(); + } + + @Override + public void literal(final String name, final String value) { + pipe.literal(name, value); + } + + @Override + protected void onResetStream() { + pipe.resetStream(); + } + + @Override + protected void onCloseStream() { + pipe.closeStream(); + } + + @Override + protected void onSetReceiver() { + encoder.setReceiver(getReceiver()); + } + + private static class Encoder extends DefaultStreamPipe> { + + private final StringBuilder builder = new StringBuilder(); + private final StringBuilder leaderBuilder = new StringBuilder(); + + private boolean atStreamStart = true; + + private boolean omitXmlDeclaration = OMIT_XML_DECLARATION; + private String xmlVersion = XML_VERSION; + private String xmlEncoding = XML_ENCODING; + + private String currentEntity = ""; + + private boolean emitNamespace = true; + private Object[] namespacePrefix = new Object[]{emitNamespace ? NAMESPACE_PREFIX : EMPTY}; + + private int indentationLevel; + private boolean formatted = PRETTY_PRINTED; + private int recordAttributeOffset; + + private Encoder() { + } + + public void setEmitNamespace(final boolean emitNamespace) { + this.emitNamespace = emitNamespace; + namespacePrefix = new Object[]{emitNamespace ? NAMESPACE_PREFIX : EMPTY}; + } + + public void omitXmlDeclaration(final boolean currentOmitXmlDeclaration) { + omitXmlDeclaration = currentOmitXmlDeclaration; + } + + public void setXmlVersion(final String xmlVersion) { + this.xmlVersion = xmlVersion; + } + + public void setXmlEncoding(final String xmlEncoding) { + this.xmlEncoding = xmlEncoding; + } + + public void setFormatted(final boolean formatted) { + this.formatted = formatted; + } + + @Override + public void startRecord(final String identifier) { + if (atStreamStart) { + if (!omitXmlDeclaration) { + writeHeader(); + prettyPrintNewLine(); + } + writeTag(Tag.collection::open, emitNamespace ? NAMESPACE_SUFFIX : EMPTY, emitNamespace ? SCHEMA_ATTRIBUTES : EMPTY); + prettyPrintNewLine(); + incrementIndentationLevel(); } + atStreamStart = false; - final String tag = name.substring(TAG_BEGIN, TAG_END); - final String ind1 = name.substring(IND1_BEGIN, IND1_END); - final String ind2 = name.substring(IND2_BEGIN, IND2_END); prettyPrintIndentation(); - writeTag(Tag.datafield::open, tag, ind1, ind2); + writeTag(Tag.record::open); + recordAttributeOffset = builder.length() - 1; prettyPrintNewLine(); + incrementIndentationLevel(); } - } - @Override - public void endEntity() { - if (!currentEntity.equals(Marc21EventNames.LEADER_ENTITY)) { + @Override + public void endRecord() { + if (leaderBuilder.length() > 0) { + writeLeader(); + } decrementIndentationLevel(); prettyPrintIndentation(); - writeTag(Tag.datafield::close); + writeTag(Tag.record::close); prettyPrintNewLine(); + sendAndClearData(); } - currentEntity = ""; - } - @Override - public void literal(final String name, final String value) { - if ("".equals(currentEntity)) { - if (name.equals(Marc21EventNames.MARCXML_TYPE_LITERAL)) { - if (value != null) { - builder.insert(recordAttributeOffset, String.format(ATTRIBUTE_TEMPLATE, name, value)); + @Override + public void startEntity(final String name) { + currentEntity = name; + if (!name.equals(Marc21EventNames.LEADER_ENTITY)) { + if (name.length() != LEADER_ENTITY_LENGTH) { + final String message = String.format("Entity too short." + "Got a string ('%s') of length %d." + + "Expected a length of " + LEADER_ENTITY_LENGTH + " (field + indicators).", name, name.length()); + throw new MetafactureException(message); } + + final String tag = name.substring(TAG_BEGIN, TAG_END); + final String ind1 = name.substring(IND1_BEGIN, IND1_END); + final String ind2 = name.substring(IND2_BEGIN, IND2_END); + prettyPrintIndentation(); + writeTag(Tag.datafield::open, tag, ind1, ind2); + prettyPrintNewLine(); + incrementIndentationLevel(); } - else if (!writeLeader(name, value)) { + } + + @Override + public void endEntity() { + if (!currentEntity.equals(Marc21EventNames.LEADER_ENTITY)) { + decrementIndentationLevel(); prettyPrintIndentation(); - writeTag(Tag.controlfield::open, name); - if (value != null) { - writeEscaped(value.trim()); + writeTag(Tag.datafield::close); + prettyPrintNewLine(); + } + currentEntity = ""; + } + + @Override + public void literal(final String name, final String value) { + if ("".equals(currentEntity)) { + if (name.equals(Marc21EventNames.MARCXML_TYPE_LITERAL)) { + if (value != null) { + builder.insert(recordAttributeOffset, String.format(ATTRIBUTE_TEMPLATE, name, value)); + } + } + else if (!appendLeader(name, value)) { + prettyPrintIndentation(); + writeTag(Tag.controlfield::open, name); + if (value != null) { + writeEscaped(value.trim()); + } + writeTag(Tag.controlfield::close); + prettyPrintNewLine(); } - writeTag(Tag.controlfield::close); + } + else if (!appendLeader(currentEntity, value)) { + prettyPrintIndentation(); + writeTag(Tag.subfield::open, name); + writeEscaped(value.trim()); + writeTag(Tag.subfield::close); prettyPrintNewLine(); } } - else if (!writeLeader(currentEntity, value)) { - prettyPrintIndentation(); - writeTag(Tag.subfield::open, name); - writeEscaped(value.trim()); - writeTag(Tag.subfield::close); - prettyPrintNewLine(); + + @Override + protected void onResetStream() { + if (!atStreamStart) { + writeFooter(); + } + sendAndClearData(); + atStreamStart = true; } - } - @Override - protected void onResetStream() { - if (!atStreamStart) { + @Override + protected void onCloseStream() { writeFooter(); + sendAndClearData(); + indentationLevel = 0; + atStreamStart = true; } - sendAndClearData(); - indentationLevel = 0; - atStreamStart = true; - } - @Override - protected void onCloseStream() { - writeFooter(); - sendAndClearData(); - } + /** Increments the indentation level by one */ + private void incrementIndentationLevel() { + indentationLevel += 1; + } - /** Increments the indentation level by one */ - private void incrementIndentationLevel() { - indentationLevel += 1; - } + /** Decrements the indentation level by one */ + private void decrementIndentationLevel() { + indentationLevel -= 1; + } - /** Decrements the indentation level by one */ - private void decrementIndentationLevel() { - indentationLevel -= 1; - } + /** Adds a XML Header */ + private void writeHeader() { + writeRaw(String.format(XML_DECLARATION_TEMPLATE, xmlVersion, xmlEncoding)); + } - /** Adds a XML Header */ - private void writeHeader() { - writeRaw(String.format(XML_DECLARATION_TEMPLATE, xmlVersion, xmlEncoding)); - } + /** Closes the root tag */ + private void writeFooter() { + writeTag(Tag.collection::close); + } - /** Closes the root tag */ - private void writeFooter() { - writeTag(Tag.collection::close); - } + /** + * Writes an unescaped sequence. + * + * @param str the unescaped sequence to be written + */ + private void writeRaw(final String str) { + builder.append(str); + } - /** - * Writes an unescaped sequence. - * - * @param str the unescaped sequence to be written - */ - private void writeRaw(final String str) { - builder.append(str); - } + /** + * Writes an unescaped sequence to the leader literal. + * + * @param str the unescaped sequence to be written + */ + private void appendLeader(final String str) { + leaderBuilder.append(str); + } - /** - * Writes an escaped sequence. - * - * @param str the unescaped sequence to be written - */ - private void writeEscaped(final String str) { - builder.append(XmlUtil.escape(str, false)); - } + private boolean appendLeader(final String name, final String value) { + if (name.equals(Marc21EventNames.LEADER_ENTITY)) { + appendLeader(value); + return true; + } + else { + return false; + } + } - private boolean writeLeader(final String name, final String value) { - if (name.equals(Marc21EventNames.LEADER_ENTITY)) { + /** + * Writes an escaped sequence. + * + * @param str the unescaped sequence to be written + */ + private void writeEscaped(final String str) { + builder.append(XmlUtil.escape(str, false)); + } + + private void writeLeader() { prettyPrintIndentation(); writeTag(Tag.leader::open); - writeRaw(value); + writeRaw(leaderBuilder.toString()); writeTag(Tag.leader::close); prettyPrintNewLine(); - - return true; } - else { - return false; + + private void writeTag(final Function function, final Object... args) { + final Object[] allArgs = Arrays.copyOf(namespacePrefix, namespacePrefix.length + args.length); + System.arraycopy(args, 0, allArgs, namespacePrefix.length, args.length); + writeRaw(function.apply(allArgs)); } - } - private void writeTag(final Function function, final Object... args) { - final Object[] allArgs = Arrays.copyOf(namespacePrefix, namespacePrefix.length + args.length); - System.arraycopy(args, 0, allArgs, namespacePrefix.length, args.length); - writeRaw(function.apply(allArgs)); - } + private void prettyPrintIndentation() { + if (formatted) { + final String prefix = String.join("", Collections.nCopies(indentationLevel, INDENT)); + builder.append(prefix); + } + } - private void prettyPrintIndentation() { - if (formatted) { - final String prefix = String.join("", Collections.nCopies(indentationLevel, INDENT)); - builder.append(prefix); + private void prettyPrintNewLine() { + if (formatted) { + builder.append(NEW_LINE); + } } - } - private void prettyPrintNewLine() { - if (formatted) { - builder.append(NEW_LINE); + private void sendAndClearData() { + getReceiver().process(builder.toString()); + builder.delete(0, builder.length()); + recordAttributeOffset = 0; } - } - private void sendAndClearData() { - getReceiver().process(builder.toString()); - builder.delete(0, builder.length()); - recordAttributeOffset = 0; } } diff --git a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java index bc6fb0d4..7d7b9736 100644 --- a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java +++ b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java @@ -16,14 +16,16 @@ package org.metafacture.biblio.marc21; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import org.metafacture.framework.FormatException; +import org.metafacture.framework.MetafactureException; +import org.metafacture.framework.MissingIdException; +import org.metafacture.framework.helpers.DefaultObjectReceiver; -import org.junit.After; import org.junit.Before; import org.junit.Test; -import org.metafacture.framework.MetafactureException; -import org.metafacture.framework.helpers.DefaultObjectReceiver; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; /** * Tests for class {@link MarcXmlEncoder}. @@ -47,7 +49,7 @@ public class MarcXmlEncoderTest { private static final String XML_MARC_COLLECTION_END_TAG = ""; private static final String RECORD_ID = "92005291"; - private static StringBuilder resultCollector; + private static StringBuilder resultCollector = new StringBuilder(); private static MarcXmlEncoder encoder; @Before @@ -60,14 +62,11 @@ public void process(final String obj) { resultCollector.append(obj); } }); - resultCollector = new StringBuilder(); - } - @After - public void tearDown() { + resultCollector.delete(0, resultCollector.length()); } - private void addOneRecord(MarcXmlEncoder encoder) { + private void addOneRecord(final MarcXmlEncoder encoder) { encoder.startRecord(RECORD_ID); encoder.literal("001", RECORD_ID); encoder.startEntity("010 "); @@ -187,7 +186,17 @@ public void emitExceptionWhenEntityLengthNot5() { } @Test - public void createAnRecordWithLeader() { + public void createAnRecordWithLeader(){ + createAnRecordWithLeader(encoder); + } + + @Test(expected = FormatException.class) + public void createAnRecordWithLeader_ensureCorrectMarc21Xml() { + encoder.setEnsureCorrectMarc21Xml(true); + createAnRecordWithLeader(encoder); + } + + private void createAnRecordWithLeader(final MarcXmlEncoder encoder) { encoder.startRecord("1"); encoder.startEntity(Marc21EventNames.LEADER_ENTITY); encoder.literal(Marc21EventNames.LEADER_ENTITY, "dummy"); @@ -212,6 +221,61 @@ public void issue336_createRecordWithTopLevelLeader() { assertEquals(expected, actual); } + @Test + public void issue336_createRecordWithTopLevelLeader_defaultMarc21Xml() { + createRecordWithTopLevelLeader(encoder, "00000naa a2200000uc 4500"); + } + + @Test + public void issue336_createRecordWithTopLevelLeader_ensureCorrectMarc21Xml() { + encoder.setEnsureCorrectMarc21Xml(true); + createRecordWithTopLevelLeader(encoder, "00048naa a2200037uc 4500"); + } + + private void createRecordWithTopLevelLeader(final MarcXmlEncoder encoder, final String expectedLeader) { + encoder.startRecord("1"); + encoder.literal("001", "8u3287432"); + encoder.literal(Marc21EventNames.LEADER_ENTITY, "00000naa a2200000uc 4500"); + encoder.endRecord(); + encoder.closeStream(); + String expected = XML_DECLARATION + XML_ROOT_OPEN + + "8u3287432" + + "" + expectedLeader + "" + XML_MARC_COLLECTION_END_TAG; + String actual = resultCollector.toString(); + assertEquals(expected, actual); + } + + @Test + public void issue527ShouldEmitLeaderAlwaysAsWholeString() { + issue527ShouldEmitLeaderAlwaysAsWholeString(encoder); + } + + @Test(expected = MissingIdException.class) + public void issue527ShouldEmitLeaderAlwaysAsWholeString_ensureCorrectMarc21Xml() { + encoder.setEnsureCorrectMarc21Xml(true); + issue527ShouldEmitLeaderAlwaysAsWholeString(encoder); + } + + private void issue527ShouldEmitLeaderAlwaysAsWholeString(MarcXmlEncoder encoder) { + encoder.startRecord("1"); + encoder.startEntity(Marc21EventNames.LEADER_ENTITY); + encoder.literal(Marc21EventNames.RECORD_STATUS_LITERAL, "a"); + encoder.literal(Marc21EventNames.RECORD_TYPE_LITERAL, "o"); + encoder.literal(Marc21EventNames.BIBLIOGRAPHIC_LEVEL_LITERAL, "a"); + encoder.literal(Marc21EventNames.TYPE_OF_CONTROL_LITERAL, " "); + encoder.literal(Marc21EventNames.CHARACTER_CODING_LITERAL, "a"); + encoder.literal(Marc21EventNames.ENCODING_LEVEL_LITERAL, "z"); + encoder.literal(Marc21EventNames.CATALOGING_FORM_LITERAL, "u"); + encoder.literal(Marc21EventNames.MULTIPART_LEVEL_LITERAL, " "); + encoder.endEntity(); + encoder.endRecord(); + encoder.closeStream(); + String expected = XML_DECLARATION + XML_ROOT_OPEN + + "aoa azu " + XML_MARC_COLLECTION_END_TAG; + String actual = resultCollector.toString(); + assertEquals(expected, actual); + } + @Test public void sendDataAndClearWhenRecordStartedAndStreamResets() { encoder.startRecord("1");