From 9983716d7d124b90de81fb199728903b01cc4488 Mon Sep 17 00:00:00 2001 From: Sascha Bieberstein Date: Thu, 20 Jun 2024 17:25:36 +0200 Subject: [PATCH] Try extracting ZIP using ISO encoding when extraction with UTF8 encoding fails In very rare cases customers upload archives with ISO-8859-1 encoding (probably created with some windows tools). We now try to use the Java logic with the secondary encoding before switching to 7ZIP (which also fails in that case). Fixes: OX-10856 --- .../sirius/biz/util/ArchiveExtractor.java | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/main/java/sirius/biz/util/ArchiveExtractor.java b/src/main/java/sirius/biz/util/ArchiveExtractor.java index d14d83291..8d1a09635 100644 --- a/src/main/java/sirius/biz/util/ArchiveExtractor.java +++ b/src/main/java/sirius/biz/util/ArchiveExtractor.java @@ -32,6 +32,8 @@ import java.io.IOException; import java.io.InputStream; import java.io.RandomAccessFile; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; import java.util.Enumeration; @@ -215,7 +217,11 @@ public void extract(String filename, Processor extractedFileConsumer) { try { if (isZipFile(Files.getFileExtension(filename)) || !isSevenZipEnabled()) { - extractZip(archiveFile, enhanceFileFilter(filter), extractedFileConsumer); + extractZip(archiveFile, + enhanceFileFilter(filter), + extractedFileConsumer, + StandardCharsets.UTF_8, + StandardCharsets.ISO_8859_1); } else { extract7z(archiveFile, enhanceFileFilter(filter), extractedFileConsumer); } @@ -243,10 +249,24 @@ private boolean ignoreHiddenFiles(String path) { private void extractZip(File archiveFile, Predicate filter, - Processor extractedFileConsumer) throws Exception { - try (ZipFile zipFile = new ZipFile(archiveFile)) { + Processor extractedFileConsumer, + Charset charset, + Charset fallbackCharset) throws Exception { + try (ZipFile zipFile = new ZipFile(archiveFile, charset)) { extractZipEntriesFromZipFile(filter, extractedFileConsumer, zipFile); } catch (ZipException zipException) { + if (fallbackCharset != null) { + // Retry extraction using the fallback charset + TaskContext.get() + .log("Cannot unzip the given archive: " + + zipException.getMessage() + + ".\nFalling back to charset: " + + fallbackCharset.displayName()); + Exceptions.ignore(zipException); + extractZip(archiveFile, filter, extractedFileConsumer, fallbackCharset, null); + return; + } + if (!isSevenZipEnabled()) { // This is most probably an error indicating an inconsistent ZIP archive. We therefore directly throw // a handled exception to avoid jamming the syslog...