From 055c5d06c22e4f665d7f9c5af0e5a781c78d0edf Mon Sep 17 00:00:00 2001 From: Matthew Blissett Date: Tue, 20 Mar 2018 17:58:15 +0100 Subject: [PATCH] DWCA schema says the default is UTF-8, if the encoding isn't provided. --- .../org/gbif/dwca/io/ArchiveFactoryTest.java | 44 ++++++++++++------- .../extension-encoding-missing/media.txt | 0 .../extension-encoding-missing/meta.xml | 0 .../meta-file-encoding-missing/meta.xml | 0 .../meta-file-encoding-missing/taxa.txt | 0 5 files changed, 27 insertions(+), 17 deletions(-) rename src/test/resources/{invalid => defaults}/extension-encoding-missing/media.txt (100%) rename src/test/resources/{invalid => defaults}/extension-encoding-missing/meta.xml (100%) rename src/test/resources/{invalid => defaults}/meta-file-encoding-missing/meta.xml (100%) rename src/test/resources/{invalid => defaults}/meta-file-encoding-missing/taxa.txt (100%) diff --git a/src/test/java/org/gbif/dwca/io/ArchiveFactoryTest.java b/src/test/java/org/gbif/dwca/io/ArchiveFactoryTest.java index f131134d1..6eb846863 100644 --- a/src/test/java/org/gbif/dwca/io/ArchiveFactoryTest.java +++ b/src/test/java/org/gbif/dwca/io/ArchiveFactoryTest.java @@ -4,6 +4,7 @@ import org.gbif.dwc.meta.DwcMetaFiles; import org.gbif.dwc.terms.DcTerm; import org.gbif.dwc.terms.DwcTerm; +import org.gbif.dwc.terms.GbifTerm; import org.gbif.dwca.record.Record; import org.gbif.dwca.record.StarRecord; import org.gbif.util.CSVReaderHelper; @@ -21,7 +22,6 @@ import java.util.List; import java.util.Set; -import com.google.common.collect.Sets; import org.junit.Test; import static org.junit.Assert.assertEquals; @@ -540,6 +540,26 @@ public void testOpenArchiveForGenericCore() throws IOException, UnsupportedArchi assertEquals(DwcLayout.FILE_ROOT, arch.getDwcLayout()); } + /** + * Basic validation of archives, where we rely on falling back to defaults from the DWC-A metadata schema. + */ + @Test + public void testFallbackToDefaultsArchives() throws IOException { + try { + Archive arch = ArchiveFactory.openArchive(FileUtils.getClasspathFile("defaults/meta-file-encoding-missing")); + assertEquals("utf8", arch.getCore().getEncoding()); + } catch (UnsupportedArchiveException e) { + fail("Core file encoding defaults to UTF-8 if missing in meta.xml."); + } + + try { + Archive arch = ArchiveFactory.openArchive(FileUtils.getClasspathFile("defaults/extension-encoding-missing")); + assertEquals("utf8", arch.getExtension(GbifTerm.Multimedia).getEncoding()); + } catch (UnsupportedArchiveException e) { + fail("Extension file encoding defaults to UTF-8 if missing in meta.xml."); + } + } + /** * Basic validation of archives, that the declared files exist and have basic, valid structure. */ @@ -547,43 +567,33 @@ public void testOpenArchiveForGenericCore() throws IOException, UnsupportedArchi public void testInvalidArchives() throws IOException { // Simple archive problems try { - Archive arch = ArchiveFactory.openArchive(FileUtils.getClasspathFile("invalid/empty")); + ArchiveFactory.openArchive(FileUtils.getClasspathFile("invalid/empty")); fail("Empty archive should not be opened."); } catch (UnsupportedArchiveException e) {} try { - Archive arch = ArchiveFactory.openArchive(FileUtils.getClasspathFile("invalid/meta-file-encoding-missing")); - fail("Archive with missing file encoding in meta.xml should not be opened."); - } catch (UnsupportedArchiveException e) {} - - try { - Archive arch = ArchiveFactory.openArchive(FileUtils.getClasspathFile("invalid/meta-file-location-missing")); + ArchiveFactory.openArchive(FileUtils.getClasspathFile("invalid/meta-file-location-missing")); fail("Archive with missing file location in meta.xml should not be opened."); } catch (UnsupportedArchiveException e) {} // Extension archive problems try { - Archive arch = ArchiveFactory.openArchive(FileUtils.getClasspathFile("invalid/extension-missing")); + ArchiveFactory.openArchive(FileUtils.getClasspathFile("invalid/extension-missing")); fail("Archive with missing extension file should not be opened."); } catch (UnsupportedArchiveException e) {} try { - Archive arch = ArchiveFactory.openArchive(FileUtils.getClasspathFile("invalid/extension-encoding-missing")); - fail("Archive with missing extension file encoding in meta.xml should not be opened."); - } catch (UnsupportedArchiveException e) {} - - try { - Archive arch = ArchiveFactory.openArchive(FileUtils.getClasspathFile("invalid/extension-location-missing")); + ArchiveFactory.openArchive(FileUtils.getClasspathFile("invalid/extension-location-missing")); fail("Archive with missing extension file location in meta.xml should not be opened."); } catch (UnsupportedArchiveException e) {} try { - Archive arch = ArchiveFactory.openArchive(FileUtils.getClasspathFile("invalid/extension-core-id-missing")); + ArchiveFactory.openArchive(FileUtils.getClasspathFile("invalid/extension-core-id-missing")); fail("Archive with extension lacking coreid in meta.xml should not be opened."); } catch (UnsupportedArchiveException e) {} try { - Archive arch = ArchiveFactory.openArchive(FileUtils.getClasspathFile("invalid/extension-id-missing")); + ArchiveFactory.openArchive(FileUtils.getClasspathFile("invalid/extension-id-missing")); fail("Archive with extension and core missing id in meta.xml should not be opened."); } catch (UnsupportedArchiveException e) {} } diff --git a/src/test/resources/invalid/extension-encoding-missing/media.txt b/src/test/resources/defaults/extension-encoding-missing/media.txt similarity index 100% rename from src/test/resources/invalid/extension-encoding-missing/media.txt rename to src/test/resources/defaults/extension-encoding-missing/media.txt diff --git a/src/test/resources/invalid/extension-encoding-missing/meta.xml b/src/test/resources/defaults/extension-encoding-missing/meta.xml similarity index 100% rename from src/test/resources/invalid/extension-encoding-missing/meta.xml rename to src/test/resources/defaults/extension-encoding-missing/meta.xml diff --git a/src/test/resources/invalid/meta-file-encoding-missing/meta.xml b/src/test/resources/defaults/meta-file-encoding-missing/meta.xml similarity index 100% rename from src/test/resources/invalid/meta-file-encoding-missing/meta.xml rename to src/test/resources/defaults/meta-file-encoding-missing/meta.xml diff --git a/src/test/resources/invalid/meta-file-encoding-missing/taxa.txt b/src/test/resources/defaults/meta-file-encoding-missing/taxa.txt similarity index 100% rename from src/test/resources/invalid/meta-file-encoding-missing/taxa.txt rename to src/test/resources/defaults/meta-file-encoding-missing/taxa.txt