Skip to content

Commit

Permalink
Improve performance of PathUtils.fileContentEquals(Path, Path)
Browse files Browse the repository at this point in the history
- Add org.apache.commons.io.channels.FileChannels.
- Add RandomAccessFiles#contentEquals(RandomAccessFile,
RandomAccessFile).
- Add RandomAccessFiles#reset(RandomAccessFile).
- Add PathUtilsContentEqualsBenchmark.
  • Loading branch information
garydgregory committed Oct 8, 2023
1 parent 67bc02c commit dd93554
Show file tree
Hide file tree
Showing 9 changed files with 400 additions and 21 deletions.
8 changes: 4 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.14.1-SNAPSHOT</version>
<version>2.15.0-SNAPSHOT</version>
<name>Apache Commons IO</name>

<inceptionYear>2002</inceptionYear>
Expand Down Expand Up @@ -52,7 +52,7 @@ file comparators, endian transformation classes, and much more.
<connection>scm:git:https://gitbox.apache.org/repos/asf/commons-io.git</connection>
<developerConnection>scm:git:https://gitbox.apache.org/repos/asf/commons-io.git</developerConnection>
<url>https://gitbox.apache.org/repos/asf?p=commons-io.git</url>
<tag>rel/commons-io-2.13.0</tag>
<tag>rel/commons-io-2.15.0</tag>
</scm>

<developers>
Expand Down Expand Up @@ -294,8 +294,8 @@ file comparators, endian transformation classes, and much more.
<commons.componentid>io</commons.componentid>
<commons.module.name>org.apache.commons.io</commons.module.name>
<commons.rc.version>RC1</commons.rc.version>
<commons.bc.version>2.13.0</commons.bc.version>
<commons.release.version>2.14.0</commons.release.version>
<commons.bc.version>2.14.0</commons.bc.version>
<commons.release.version>2.15.0</commons.release.version>
<commons.release.desc>(requires Java 8)</commons.release.desc>
<commons.jira.id>IO</commons.jira.id>
<commons.jira.pid>12310477</commons.jira.pid>
Expand Down
23 changes: 22 additions & 1 deletion src/changes/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ The <action> type attribute can be add,update,fix,remove.
</properties>

<body>
<release version="2.14.1" date="202Y-MM-DD" description="Java 8 is required.">
<release version="2.15.0" date="202Y-MM-DD" description="Java 8 is required.">
<!-- FIX -->
<action dev="sebb" type="fix" issue="IO-810" due-to="Laurence Gonsalves">
XmlStreamReader encoding match RE is too strict.
Expand All @@ -73,7 +73,28 @@ The <action> type attribute can be add,update,fix,remove.
<action dev="ggregory" type="fix" issue="IO-814" due-to="Elliotte Rusty Harold, Gary Gregory">
Don't throw UncheckedIOException #491.
</action>
<action dev="ggregory" type="fix" issue="IO-814" due-to="Gary Gregory">
RandomAccessFileMode.create(Path) provides a better NullPointerException message.
</action>
<action dev="ggregory" type="fix" due-to="Gary Gregory">
Improve performance of PathUtils.fileContentEquals(Path, Path, LinkOption[], OpenOption[]) by about 60%, see PathUtilsContentEqualsBenchmark.
</action>
<action dev="ggregory" type="fix" due-to="Gary Gregory">
Improve performance of PathUtils.fileContentEquals(Path, Path) by about 60%, see PathUtilsContentEqualsBenchmark.
</action>
<!-- ADD -->
<action dev="ggregory" type="add" due-to="Gary Gregory">
Add org.apache.commons.io.channels.FileChannels.
</action>
<action dev="ggregory" type="add" due-to="Gary Gregory">
Add RandomAccessFiles#contentEquals(RandomAccessFile, RandomAccessFile).
</action>
<action dev="ggregory" type="add" due-to="Gary Gregory">
Add RandomAccessFiles#reset(RandomAccessFile).
</action>
<action dev="ggregory" type="add" due-to="Gary Gregory">
Add PathUtilsContentEqualsBenchmark.
</action>
<!-- UPDATE -->
</release>
<release version="2.14.0" date="2023-09-24" description="Java 8 is required.">
Expand Down
55 changes: 54 additions & 1 deletion src/main/java/org/apache/commons/io/RandomAccessFiles.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,54 @@

import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import java.util.Objects;

import org.apache.commons.io.channels.FileChannels;

/**
* Works on RandomAccessFile.
* Works with {@link RandomAccessFile}.
*
* @since 2.13.0
*/
public class RandomAccessFiles {

/**
* Tests if two RandomAccessFile contents are equal.
*
* @param raf1 A RandomAccessFile.
* @param raf2 Another RandomAccessFile.
* @return true if the contents of both RandomAccessFiles are equal, false otherwise.
* @throws IOException if an I/O error occurs.
* @since 2.15.0
*/
@SuppressWarnings("resource") // See comments
public static boolean contentEquals(final RandomAccessFile raf1, final RandomAccessFile raf2) throws IOException {
// Short-circuit test
if (Objects.equals(raf1, raf2)) {
return true;
}
// Short-circuit test
final long length1 = length(raf1);
final long length2 = length(raf2);
if (length1 != length2) {
return false;
}
if (length1 == 0 && length2 == 0) {
return true;
}
// Dig in and to the work
// We do not close FileChannels because that closes the owning RandomAccessFile.
// Instead, the caller is assumed to manage the given RandomAccessFile objects.
final FileChannel channel1 = raf1.getChannel();
final FileChannel channel2 = raf2.getChannel();
return FileChannels.contentEquals(channel1, channel2, IOUtils.DEFAULT_BUFFER_SIZE);
}

private static long length(final RandomAccessFile raf) throws IOException {
return raf != null ? raf.length() : 0;
}

/**
* Reads a byte array starting at "position" for "length" bytes.
*
Expand All @@ -42,4 +82,17 @@ public static byte[] read(final RandomAccessFile input, final long position, fin
return IOUtils.toByteArray(input::read, length);
}

/**
* Resets the given file to position 0.
*
* @param raf The RandomAccessFile to reset.
* @return The given RandomAccessFile.
* @throws IOException If {@code pos} is less than {@code 0} or if an I/O error occurs.
* @since 2.15.0
*/
public static RandomAccessFile reset(final RandomAccessFile raf) throws IOException {
raf.seek(0);
return raf;
}

}
87 changes: 87 additions & 0 deletions src/main/java/org/apache/commons/io/channels/FileChannels.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.commons.io.channels;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.Objects;

import org.apache.commons.io.IOUtils;

/**
* Works with {@link FileChannel}.
*
* @since 2.15.0
*/
public final class FileChannels {

/**
* Don't instantiate.
*/
private FileChannels() {
// no-op
}

/**
* Tests if two RandomAccessFiles contents are equal.
*
* @param channel1 A FileChannel.
* @param channel2 Another FileChannel.
* @param byteBufferSize The two internal buffer capacities, in bytes.
* @return true if the contents of both RandomAccessFiles are equal, false otherwise.
* @throws IOException if an I/O error occurs.
*/
public static boolean contentEquals(final FileChannel channel1, final FileChannel channel2, final int byteBufferSize) throws IOException {
// Short-circuit test
if (Objects.equals(channel1, channel2)) {
return true;
}
// Short-circuit test
final long size1 = size(channel1);
final long size2 = size(channel2);
if (size1 != size2) {
return false;
}
if (size1 == 0 && size2 == 0) {
return true;
}
// Dig in and do the work
final ByteBuffer byteBuffer1 = ByteBuffer.allocateDirect(byteBufferSize);
final ByteBuffer byteBuffer2 = ByteBuffer.allocateDirect(byteBufferSize);
while (true) {
final int read1 = channel1.read(byteBuffer1);
final int read2 = channel2.read(byteBuffer2);
if (read1 == IOUtils.EOF && read2 == IOUtils.EOF) {
return byteBuffer1.equals(byteBuffer2);
}
if (read1 != read2) {
return false;
}
if (!byteBuffer1.equals(byteBuffer2)) {
return false;
}
byteBuffer1.clear();
byteBuffer2.clear();
}
}

private static long size(final FileChannel channel) throws IOException {
return channel != null ? channel.size() : 0;
}
}
23 changes: 23 additions & 0 deletions src/main/java/org/apache/commons/io/channels/package-info.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* Provides classes to work with {@link java.nio.channels}.
*
* @since 2.15.0
*/
package org.apache.commons.io.channels;
16 changes: 9 additions & 7 deletions src/main/java/org/apache/commons/io/file/PathUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.math.BigInteger;
import java.net.URI;
import java.net.URISyntaxException;
Expand Down Expand Up @@ -69,7 +70,8 @@
import org.apache.commons.io.Charsets;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.RandomAccessFileMode;
import org.apache.commons.io.RandomAccessFiles;
import org.apache.commons.io.ThreadUtils;
import org.apache.commons.io.file.Counters.PathCounters;
import org.apache.commons.io.file.attribute.FileTimes;
Expand Down Expand Up @@ -717,20 +719,20 @@ public static boolean fileContentEquals(final Path path1, final Path path2) thro
/**
* Compares the file contents of two Paths to determine if they are equal or not.
* <p>
* File content is accessed through {@link Files#newInputStream(Path,OpenOption...)}.
* File content is accessed through {@link RandomAccessFileMode#create(Path)}.
* </p>
*
* @param path1 the first stream.
* @param path2 the second stream.
* @param linkOptions options specifying how files are followed.
* @param openOptions options specifying how files are opened.
* @param openOptions ignored.
* @return true if the content of the streams are equal or they both don't exist, false otherwise.
* @throws NullPointerException if openOptions is null.
* @throws IOException if an I/O error occurs.
* @see org.apache.commons.io.FileUtils#contentEquals(java.io.File, java.io.File)
*/
public static boolean fileContentEquals(final Path path1, final Path path2, final LinkOption[] linkOptions, final OpenOption[] openOptions)
throws IOException {
throws IOException {
if (path1 == null && path2 == null) {
return true;
}
Expand Down Expand Up @@ -764,9 +766,9 @@ public static boolean fileContentEquals(final Path path1, final Path path2, fina
// same file
return true;
}
try (InputStream inputStream1 = Files.newInputStream(nPath1, openOptions);
InputStream inputStream2 = Files.newInputStream(nPath2, openOptions)) {
return IOUtils.contentEquals(inputStream1, inputStream2);
try (RandomAccessFile raf1 = RandomAccessFileMode.READ_ONLY.create(path1.toRealPath(linkOptions));
RandomAccessFile raf2 = RandomAccessFileMode.READ_ONLY.create(path2.toRealPath(linkOptions))) {
return RandomAccessFiles.contentEquals(raf1, raf2);
}
}

Expand Down
Loading

0 comments on commit dd93554

Please sign in to comment.