Skip to content

Commit

Permalink
Try extracting ZIP using ISO encoding when extraction with UTF8 encod…
Browse files Browse the repository at this point in the history
…ing fails

In very rare cases customers upload archives with ISO-8859-1 encoding (probably created with some windows tools).
We now try to use the Java logic with the secondary encoding before switching to 7ZIP (which also fails in that case).

Fixes: OX-10856
  • Loading branch information
sabieber committed Jun 20, 2024
1 parent f1d81af commit 9983716
Showing 1 changed file with 23 additions and 3 deletions.
26 changes: 23 additions & 3 deletions src/main/java/sirius/biz/util/ArchiveExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
Expand Down Expand Up @@ -215,7 +217,11 @@ public void extract(String filename,
Processor<ExtractedFile, Boolean> extractedFileConsumer) {
try {
if (isZipFile(Files.getFileExtension(filename)) || !isSevenZipEnabled()) {
extractZip(archiveFile, enhanceFileFilter(filter), extractedFileConsumer);
extractZip(archiveFile,
enhanceFileFilter(filter),
extractedFileConsumer,
StandardCharsets.UTF_8,
StandardCharsets.ISO_8859_1);
} else {
extract7z(archiveFile, enhanceFileFilter(filter), extractedFileConsumer);
}
Expand Down Expand Up @@ -243,10 +249,24 @@ private boolean ignoreHiddenFiles(String path) {

private void extractZip(File archiveFile,
Predicate<String> filter,
Processor<ExtractedFile, Boolean> extractedFileConsumer) throws Exception {
try (ZipFile zipFile = new ZipFile(archiveFile)) {
Processor<ExtractedFile, Boolean> extractedFileConsumer,
Charset charset,
Charset fallbackCharset) throws Exception {
try (ZipFile zipFile = new ZipFile(archiveFile, charset)) {
extractZipEntriesFromZipFile(filter, extractedFileConsumer, zipFile);
} catch (ZipException zipException) {
if (fallbackCharset != null) {
// Retry extraction using the fallback charset
TaskContext.get()
.log("Cannot unzip the given archive: "
+ zipException.getMessage()
+ ".\nFalling back to charset: "
+ fallbackCharset.displayName());
Exceptions.ignore(zipException);
extractZip(archiveFile, filter, extractedFileConsumer, fallbackCharset, null);
return;
}

if (!isSevenZipEnabled()) {
// This is most probably an error indicating an inconsistent ZIP archive. We therefore directly throw
// a handled exception to avoid jamming the syslog...
Expand Down

0 comments on commit 9983716

Please sign in to comment.