Skip to content

Commit

Permalink
Merge pull request #2013 from scireum/feature/sbi/OX-10856
Browse files Browse the repository at this point in the history
ZIP Handling - Fall back to ISO-8859-1 when UTF8 wxtraction fails
  • Loading branch information
sabieber authored Jun 21, 2024
2 parents 6276190 + 9983716 commit 0068998
Showing 1 changed file with 23 additions and 3 deletions.
26 changes: 23 additions & 3 deletions src/main/java/sirius/biz/util/ArchiveExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
Expand Down Expand Up @@ -215,7 +217,11 @@ public void extract(String filename,
Processor<ExtractedFile, Boolean> extractedFileConsumer) {
try {
if (isZipFile(Files.getFileExtension(filename)) || !isSevenZipEnabled()) {
extractZip(archiveFile, enhanceFileFilter(filter), extractedFileConsumer);
extractZip(archiveFile,
enhanceFileFilter(filter),
extractedFileConsumer,
StandardCharsets.UTF_8,
StandardCharsets.ISO_8859_1);
} else {
extract7z(archiveFile, enhanceFileFilter(filter), extractedFileConsumer);
}
Expand Down Expand Up @@ -243,10 +249,24 @@ private boolean ignoreHiddenFiles(String path) {

private void extractZip(File archiveFile,
Predicate<String> filter,
Processor<ExtractedFile, Boolean> extractedFileConsumer) throws Exception {
try (ZipFile zipFile = new ZipFile(archiveFile)) {
Processor<ExtractedFile, Boolean> extractedFileConsumer,
Charset charset,
Charset fallbackCharset) throws Exception {
try (ZipFile zipFile = new ZipFile(archiveFile, charset)) {
extractZipEntriesFromZipFile(filter, extractedFileConsumer, zipFile);
} catch (ZipException zipException) {
if (fallbackCharset != null) {
// Retry extraction using the fallback charset
TaskContext.get()
.log("Cannot unzip the given archive: "
+ zipException.getMessage()
+ ".\nFalling back to charset: "
+ fallbackCharset.displayName());
Exceptions.ignore(zipException);
extractZip(archiveFile, filter, extractedFileConsumer, fallbackCharset, null);
return;
}

if (!isSevenZipEnabled()) {
// This is most probably an error indicating an inconsistent ZIP archive. We therefore directly throw
// a handled exception to avoid jamming the syslog...
Expand Down

0 comments on commit 0068998

Please sign in to comment.