From 9540a1f6e114d1fc16b81d153ccdfd49bb0f3127 Mon Sep 17 00:00:00 2001 From: XenoAmess Date: Tue, 2 Jun 2020 00:27:07 +0800 Subject: [PATCH] performance refine for IOUtils.contentEquals(Reader, Reader) --- pom.xml | 52 ++ .../java/org/apache/commons/io/IOUtils.java | 222 +++++- .../buffer/LineEndUnifiedBufferedReader.java | 238 +++++++ .../buffer/UnsyncBufferedInputStream.java | 207 ++++++ .../io/input/buffer/UnsyncBufferedReader.java | 207 ++++++ .../LineEndUnifiedBufferedReaderTest.java | 187 +++++ .../buffer/UnsyncBufferedInputStreamTest.java | 143 ++++ .../buffer/UnsyncBufferedReaderTest.java | 144 ++++ .../IOUtilsContentEqualsPerformanceTest.java | 648 ++++++++++++++++++ 9 files changed, 2017 insertions(+), 31 deletions(-) create mode 100644 src/main/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReader.java create mode 100644 src/main/java/org/apache/commons/io/input/buffer/UnsyncBufferedInputStream.java create mode 100644 src/main/java/org/apache/commons/io/input/buffer/UnsyncBufferedReader.java create mode 100644 src/test/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReaderTest.java create mode 100644 src/test/java/org/apache/commons/io/input/buffer/UnsyncBufferedInputStreamTest.java create mode 100644 src/test/java/org/apache/commons/io/input/buffer/UnsyncBufferedReaderTest.java create mode 100644 src/test/java/org/apache/commons/io/performance/IOUtilsContentEqualsPerformanceTest.java diff --git a/pom.xml b/pom.xml index 0746235ea5a..9c36a564d96 100644 --- a/pom.xml +++ b/pom.xml @@ -242,6 +242,18 @@ file comparators, endian transformation classes, and much more. + + org.openjdk.jmh + jmh-core + ${jmh.version} + test + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + test + org.junit.jupiter junit-jupiter @@ -312,6 +324,7 @@ file comparators, endian transformation classes, and much more. true Gary Gregory 86fdc7e2a11262cb + 1.21 @@ -549,5 +562,44 @@ file comparators, endian transformation classes, and much more. true + + benchmark + + true + org.apache + + + + + org.codehaus.mojo + exec-maven-plugin + 1.6.0 + + + benchmark + test + + exec + + + test + java + + -classpath + + org.openjdk.jmh.Main + -rf + json + -rff + target/jmh-result.${benchmark}.json + ${benchmark} + + + + + + + + diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index 34b0baf5b2f..8af30e74a4b 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -50,6 +50,7 @@ import java.util.function.Consumer; import org.apache.commons.io.function.IOConsumer; +import org.apache.commons.io.input.buffer.LineEndUnifiedBufferedReader; import org.apache.commons.io.output.AppendableWriter; import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.commons.io.output.NullOutputStream; @@ -157,14 +158,14 @@ public class IOUtils { /** * The Unix line separator string. - * + * * @see StandardLineSeparator#LF */ public static final String LINE_SEPARATOR_UNIX = StandardLineSeparator.LF.getString(); /** * The Windows line separator string. - * + * * @see StandardLineSeparator#CRLF */ public static final String LINE_SEPARATOR_WINDOWS = StandardLineSeparator.CRLF.getString(); @@ -745,23 +746,48 @@ public static long consume(final InputStream input) @SuppressWarnings("resource") public static boolean contentEquals(final InputStream input1, final InputStream input2) throws IOException { + // see comments in public static boolean contentEquals(final Reader input1, final Reader input2) + // this function is mirror to it. if (input1 == input2) { return true; } if (input1 == null ^ input2 == null) { return false; } - final BufferedInputStream bufferedInput1 = buffer(input1); - final BufferedInputStream bufferedInput2 = buffer(input2); - int ch = bufferedInput1.read(); - while (EOF != ch) { - final int ch2 = bufferedInput2.read(); - if (ch != ch2) { - return false; + + byte[] byteArray1 = new byte[DEFAULT_BUFFER_SIZE]; + byte[] byteArray2 = new byte[DEFAULT_BUFFER_SIZE]; + int nowPos1; + int nowPos2; + int nowRead1; + int nowRead2; + while (true) { + nowPos1 = 0; + nowPos2 = 0; + for (int nowCheck = 0; nowCheck < DEFAULT_BUFFER_SIZE; nowCheck++) { + if (nowPos1 == nowCheck) { + do { + nowRead1 = input1.read(byteArray1, nowPos1, DEFAULT_BUFFER_SIZE - nowPos1); + } while (nowRead1 == 0); + if (nowRead1 == EOF) { + return nowPos2 == nowCheck && input2.read() == EOF; + } + nowPos1 += nowRead1; + } + if (nowPos2 == nowCheck) { + do { + nowRead2 = input2.read(byteArray2, nowPos2, DEFAULT_BUFFER_SIZE - nowPos2); + } while (nowRead2 == 0); + if (nowRead2 == EOF) { + return nowPos1 == nowCheck && input1.read() == EOF; + } + nowPos2 += nowRead2; + } + if (byteArray1[nowCheck] != byteArray2[nowCheck]) { + return false; + } } - ch = bufferedInput1.read(); } - return bufferedInput2.read() == EOF; } /** @@ -789,19 +815,79 @@ public static boolean contentEquals(final Reader reader1, final Reader reader2) if (reader1 == null ^ reader2 == null) { return false; } - final BufferedReader bufferedInput1 = toBufferedReader(reader1); - final BufferedReader bufferedInput2 = toBufferedReader(reader2); - int ch = bufferedInput1.read(); - while (EOF != ch) { - final int ch2 = bufferedInput2.read(); - if (ch != ch2) { - return false; + // char buffer array for input1 + char[] charArray1 = new char[DEFAULT_BUFFER_SIZE]; + // char buffer array for input2 + char[] charArray2 = new char[DEFAULT_BUFFER_SIZE]; + + // the current last-index of chars read to charArray1 from input1 + int nowPos1; + // the current last-index of chars read to charArray2 from input2 + int nowPos2; + // the chars read this time. + int nowRead; + while (true) { + nowPos1 = 0; + nowPos2 = 0; + /* + * For better performance, this loop is special designed. + * Since input1 and input2's content must be equal to return true, + * we share the index used in the two char buffers, + * by simply make it from 0 to DEFAULT_BUFFER_SIZE, means 8192. + * Every time it read, it read as long as possible, both limited by the input reader itself, + * and the remaining length of this array. + * The performance of the following loop can be proved simply. + * 1. If the reader can read only several chars during one read() call: + * then we only invert it every 8192 times, thus it will not be time costing. + * 2. If the reader can read many chars during one read() call: + * then it will be filled fast, and also will not be time costing. + */ + for (int nowCheck = 0; nowCheck < DEFAULT_BUFFER_SIZE; nowCheck++) { + if (nowPos1 == nowCheck) { + // if nowPos1 == nowCheck, + // then means charArray1[nowCheck] + // is empty now, thus we need to invoke read on input1 first. + do { + // read as many chars as possible, using the remaining spaces of charArray1. + nowRead = reader1.read(charArray1, nowPos1, DEFAULT_BUFFER_SIZE - nowPos1); + } while (nowRead == 0); + if (nowRead == EOF) { + // if input1 ends, then we check if input2 ends too. + // if nowPos2 == nowCheck && input2.read() == EOF, + // we think input2 have no more chars, + // and cannot read more either, + // thus return true. + // otherwise return false. + return nowPos2 == nowCheck && reader2.read() == EOF; + } + nowPos1 += nowRead; + } + if (nowPos2 == nowCheck) { + // if nowPos1 == nowCheck, + // then means charArray1[nowCheck] + // is empty now, thus we need to invoke read on input1 first. + do { + // read as many chars as possible, using the remaining spaces of charArray2. + nowRead = reader2.read(charArray2, nowPos2, DEFAULT_BUFFER_SIZE - nowPos2); + } while (nowRead == 0); + if (nowRead == EOF) { + // if input2 ends, then we check if input1 ends too. + // if nowPos1 == nowCheck && input1.read() == EOF, + // we think input1 have no more chars, + // and cannot read more either, + // thus return true. + // otherwise return false. + return nowPos1 == nowCheck && reader1.read() == EOF; + } + nowPos2 += nowRead; + } + // now we have + if (charArray1[nowCheck] != charArray2[nowCheck]) { + return false; + } } - ch = bufferedInput1.read(); } - - return bufferedInput2.read() == EOF; } /** @@ -827,16 +913,90 @@ public static boolean contentEqualsIgnoreEOL(final Reader reader1, final Reader if (reader1 == null ^ reader2 == null) { return false; } - final BufferedReader br1 = toBufferedReader(reader1); - final BufferedReader br2 = toBufferedReader(reader2); - String line1 = br1.readLine(); - String line2 = br2.readLine(); - while (line1 != null && line1.equals(line2)) { - line1 = br1.readLine(); - line2 = br2.readLine(); + final LineEndUnifiedBufferedReader bufferedInput1; + if (reader1 instanceof LineEndUnifiedBufferedReader) { + bufferedInput1 = (LineEndUnifiedBufferedReader) reader1; + } else { + bufferedInput1 = new LineEndUnifiedBufferedReader(reader1); + } + + final LineEndUnifiedBufferedReader bufferedInput2; + if (reader2 instanceof LineEndUnifiedBufferedReader) { + bufferedInput2 = (LineEndUnifiedBufferedReader) reader2; + } else { + bufferedInput2 = new LineEndUnifiedBufferedReader(reader2); + } + + /* + * We use this variable to mark if last char be '\n'. + * Because "a" and "a\n" is thought contentEqualsIgnoreEOL, + * but "\n" and "\n\n" is thought not contentEqualsIgnoreEOL. + */ + boolean justNewLine = true; + + int currentChar1; + int currentChar2; + + while (true) { + currentChar1 = bufferedInput1.peek(); + currentChar2 = bufferedInput2.peek(); + + if (currentChar1 == EOF) { + if (currentChar2 == EOF) { + return true; + } else { + if (!justNewLine) { + return inputOnlyHaveCRLForEOF( bufferedInput2, currentChar2); + } + return false; + } + } else if (currentChar2 == EOF) { + if (!justNewLine) { + return inputOnlyHaveCRLForEOF(bufferedInput1, currentChar1); + } + return false; + } + if (currentChar1 != currentChar2) { + return false; + } + justNewLine = currentChar1 == '\n'; + bufferedInput1.eat(); + bufferedInput2.eat(); + } + } + + /** + * private function used only in contentEqualsIgnoreEOL. + * used in contentEqualsIgnoreEOL to detect whether a input only have CRLF or EOF. + * @param input input reader + * @param currentChar current peek char of input + * @return true/false + * @throws IOException by input.read(), not me. + * @see #contentEqualsIgnoreEOL(Reader, Reader) + */ + private static boolean inputOnlyHaveCRLForEOF(LineEndUnifiedBufferedReader input, int currentChar) throws IOException { + + /* + * logically there should be some code like + * + * if (char1 == EOF) { + * return true; + * } + * + * here. + * + * But actually, if this input's read() is EOF, then we will not invoke this function at all. + * So the check is deleted. + * + * You can go contentEqualsIgnoreEOL for details. + */ + + if (currentChar == '\n') { + input.eat(); + return input.read() == EOF; } - return Objects.equals(line1, line2); + return false; } /** @@ -1154,7 +1314,7 @@ public static long copyLarge(final InputStream inputStream, final OutputStream o *

* * @param inputStream the InputStream to read, may be {@code null}. - * @param outputStream the OutputStream to write + * @param outputStream the OutputStream to write * @param buffer the buffer to use for the copy * @return the number of bytes copied. or {@code 0} if {@code input} is {@code null}. * @throws NullPointerException if the OutputStream is {@code null}. @@ -3382,7 +3542,7 @@ public static Writer writer(final Appendable appendable) { * Instances should NOT be constructed in standard programming. */ public IOUtils() { //NOSONAR - + } } diff --git a/src/main/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReader.java b/src/main/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReader.java new file mode 100644 index 00000000000..42a1bcd147a --- /dev/null +++ b/src/main/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReader.java @@ -0,0 +1,238 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.io.input.buffer; + +import java.io.IOException; +import java.io.Reader; +import org.apache.commons.io.IOUtils; +import static org.apache.commons.io.IOUtils.EOF; + +/** + * A NonThreadSafeButFastBufferedReader who use some filters to make line ends unified. + * + *
    + *
  • "\r\n" in original reader will become "\n", + *
  • "\n" in original reader will become "\n", + *
  • "\r" with normal character behind in original reader will become "\n", + *
  • if the original reader ends with "\r" then it will become "\n". + *
+ */ +public class LineEndUnifiedBufferedReader extends UnsyncBufferedReader { + + private boolean cachedCR; + + /** + * Creates a new instance, which filters the given reader, and + * uses the given buffer size. + * + * @param reader The original reader, which is being buffered. + * @param charBufferSize size of the buffer. + */ + public LineEndUnifiedBufferedReader(Reader reader, int charBufferSize) { + super(reader, charBufferSize); + } + + /** + * Creates a new instance, which filters the given reader, and + * uses IOUtils.DEFAULT_BUFFER_SIZE. + * + * @param reader The original reader, which is being buffered. + * @see IOUtils#DEFAULT_BUFFER_SIZE + */ + public LineEndUnifiedBufferedReader(Reader reader) { + super(reader); + } + + /** + * Creates a new instance, which filters the given reader, and + * uses the given buffer. + * + * @param reader The original reader, which is being buffered. + * @param charBuffer buffer used. + */ + public LineEndUnifiedBufferedReader(Reader reader, char[] charBuffer) { + super(reader, charBuffer); + } + + /** + * {@inheritDoc} + */ + @Override + public int read(char[] cbuf, int off, int len) throws IOException { + if (len <= 0) { + return 0; + } + final char[] charBufferLocal = this.getCharBuffer(); + final Reader readerLocal = this.getReader(); + int nowLimitLocal = this.getNowLimit(); + int nowIndexLocal = this.getNowIndex(); + int currentBufferSize = nowLimitLocal - nowIndexLocal; + + if (currentBufferSize == 0) { + nowLimitLocal = nowIndexLocal = 0; + if (this.cachedCR) { + charBufferLocal[nowLimitLocal++] = '\r'; + } + + int readLength; + do { + readLength = readerLocal.read(charBufferLocal, 0, charBufferLocal.length - nowLimitLocal); + } while (readLength == 0); + if (readLength == EOF) { + if (this.cachedCR) { + this.cachedCR = false; + cbuf[off] = charBufferLocal[nowIndexLocal++] = '\n'; + this.setNowIndex(nowIndexLocal); + this.setNowLimit(nowLimitLocal); + return 1; + } + this.setNowIndex(nowIndexLocal); + this.setNowLimit(nowLimitLocal); + return EOF; + } + nowLimitLocal += readLength; + + if (charBufferLocal[nowLimitLocal - 1] == '\r') { + --nowLimitLocal; + this.cachedCR = true; + } else { + this.cachedCR = false; + } + this.setNowLimit(nowLimitLocal); + if (nowLimitLocal == 0) { + this.setNowIndex(nowIndexLocal); + return 0; + } + this.filter(); + nowLimitLocal = this.getNowLimit(); + nowIndexLocal = this.getNowIndex(); + currentBufferSize = nowLimitLocal - nowIndexLocal; + } + if (currentBufferSize <= len) { + System.arraycopy(charBufferLocal, nowIndexLocal, cbuf, off, currentBufferSize); + nowLimitLocal = nowIndexLocal = 0; + this.setNowIndex(nowIndexLocal); + this.setNowLimit(nowLimitLocal); + return currentBufferSize; + } else { + System.arraycopy(charBufferLocal, nowIndexLocal, cbuf, off, len); + nowIndexLocal += len; + this.setNowIndex(nowIndexLocal); + this.setNowLimit(nowLimitLocal); + return len; + } + } + + /** + * {@inheritDoc} + */ + @Override + public int peek() throws IOException { + final char[] charBufferLocal = this.getCharBuffer(); + final Reader readerLocal = this.getReader(); + int nowLimitLocal = this.getNowLimit(); + int nowIndexLocal = this.getNowIndex(); + + int currentBufferSize = nowLimitLocal - nowIndexLocal; + if (currentBufferSize == 0) { + nowLimitLocal = nowIndexLocal = 0; + if (this.cachedCR) { + charBufferLocal[nowLimitLocal++] = '\r'; + } + + int readLength; + do { + readLength = readerLocal.read(charBufferLocal, 0, charBufferLocal.length - nowLimitLocal); + } while (readLength == 0); + if (readLength == EOF) { + if (this.cachedCR) { + this.cachedCR = false; + this.setNowIndex(nowIndexLocal); + this.setNowLimit(nowLimitLocal); + return charBufferLocal[nowIndexLocal] = '\n'; + } + this.setNowIndex(nowIndexLocal); + this.setNowLimit(nowLimitLocal); + return EOF; + } + nowLimitLocal += readLength; + + if (charBufferLocal[nowLimitLocal - 1] == '\r') { + --nowLimitLocal; + this.cachedCR = true; + } else { + this.cachedCR = false; + } + this.setNowLimit(nowLimitLocal); + if (nowLimitLocal == 0) { + this.setNowIndex(nowIndexLocal); + return this.peek(); + } + this.filter(); + nowLimitLocal = this.getNowLimit(); + nowIndexLocal = this.getNowIndex(); + } + return charBufferLocal[nowIndexLocal]; + } + + /** + * Make sure chars in the charBuffer have no '\r'. + * "\r\n" in original reader will become "\n", + * "\n" in original reader will become "\n", + * "\r" with normal character behind in original reader will become "\n". + * Other chars should not change. + * After the filter, change this.nowIndex accordingly. + */ + private void filter() { + final char[] charBufferLocal = this.getCharBuffer(); + + int i = this.getNowLimit() - 1; + int j = i; + if (i >= 0) { + for (; i >= 0; --i, --j) { + if (charBufferLocal[i] == '\n') { + charBufferLocal[j] = '\n'; + final int i_1 = i - 1; + if (i_1 >= 0 && charBufferLocal[i_1] == '\r') { + --i; + } + } else if (charBufferLocal[i] == '\r') { + charBufferLocal[j] = '\n'; + } else { + charBufferLocal[j] = charBufferLocal[i]; + } + } + this.setNowIndex(j + 1); + } + } + + /** + * getter for this.cacheCR + * @return this.cacheCR + */ + public boolean isCachedCR() { + return this.cachedCR; + } + + /** + * setter for this.cacheCR + * @param cachedCR this.cacheCR + */ + public void setCachedCR(boolean cachedCR) { + this.cachedCR = cachedCR; + } +} diff --git a/src/main/java/org/apache/commons/io/input/buffer/UnsyncBufferedInputStream.java b/src/main/java/org/apache/commons/io/input/buffer/UnsyncBufferedInputStream.java new file mode 100644 index 00000000000..b0191c2c50d --- /dev/null +++ b/src/main/java/org/apache/commons/io/input/buffer/UnsyncBufferedInputStream.java @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.io.input.buffer; + +import java.io.IOException; +import java.io.InputStream; +import org.apache.commons.io.IOUtils; +import static org.apache.commons.io.IOUtils.EOF; + +/** + * A BufferedReader class who does not care about thread safety, but very much faster. + * + * Should be able to replace java.io.BufferedReader in nearly every use-cases when you + * need the Reader be buffered, but do not need it have thread safety. + */ +public class UnsyncBufferedInputStream extends InputStream { + private final InputStream inputStream; + private final byte[] byteBuffer; + + private int nowIndex = 0; + private int nowLimit = 0; + + /** + * Creates a new instance, which filters the given input stream, and + * uses the given buffer size. + * + * @param inputStream The original input stream, which is being buffered. + * @param charBufferSize size of the buffer. + */ + public UnsyncBufferedInputStream(InputStream inputStream, int charBufferSize) { + this(inputStream, new byte[charBufferSize]); + } + + /** + * Creates a new instance, which filters the given input stream, and + * uses IOUtils.DEFAULT_BUFFER_SIZE. + * + * @param inputStream The original input stream, which is being buffered. + * @see IOUtils#DEFAULT_BUFFER_SIZE + */ + public UnsyncBufferedInputStream(InputStream inputStream) { + this(inputStream, IOUtils.DEFAULT_BUFFER_SIZE); + } + + /** + * Creates a new instance, which filters the given reader, and + * uses the given buffer. + * + * @param inputStream The original inputStream, which is being buffered. + * @param byteBuffer buffer used. + */ + public UnsyncBufferedInputStream(InputStream inputStream, byte[] byteBuffer) { + this.inputStream = inputStream; + this.byteBuffer = byteBuffer; + } + + /** + * {@inheritDoc} + */ + @Override + public int read(byte[] cbuf, int off, int len) throws IOException { + if (len <= 0) { + return 0; + } + int currentBufferSize = this.nowLimit - this.nowIndex; + if (currentBufferSize == 0) { + int readLength; + do { + readLength = this.inputStream.read(this.byteBuffer, 0, this.byteBuffer.length); + } while (readLength == 0); + if (readLength == EOF) { + return EOF; + } + this.nowLimit = readLength; + this.nowIndex = 0; + currentBufferSize = this.nowLimit - this.nowIndex; + } + if (currentBufferSize <= len) { + System.arraycopy(this.byteBuffer, this.nowIndex, cbuf, off, currentBufferSize); + this.nowLimit = this.nowIndex = 0; + return currentBufferSize; + } else { + System.arraycopy(this.byteBuffer, this.nowIndex, cbuf, off, len); + this.nowIndex += len; + return len; + } + } + + /** + * {@inheritDoc} + */ + @Override + public int read() throws IOException { + int res = this.peek(); + if (res != EOF) { + eat(); + } + return res; + } + + /** + * see the next byte, but not mark it as read. + * + * @return the next byte + * @throws IOException by inputStream.read() + * @see #read() + */ + public int peek() throws IOException { + int currentBufferSize = this.nowLimit - this.nowIndex; + if (currentBufferSize == 0) { + int readLength; + do { + readLength = this.inputStream.read(this.byteBuffer, 0, this.byteBuffer.length); + } while (readLength == 0); + if (readLength == EOF) { + return EOF; + } + this.nowLimit = readLength; + this.nowIndex = 0; + return this.byteBuffer[0]; + } + return this.byteBuffer[this.nowIndex]; + } + + /** + * mark the current char as read. + * must be used after invoke peek. + * + * @see #read() + * @see #peek() + */ + public void eat() { + this.nowIndex++; + } + + /** + * {@inheritDoc} + */ + @Override + public void close() throws IOException { + if (this.inputStream != null) { + this.inputStream.close(); + } + } + + /** + * getter for this.inputStream + * @return this.inputStream + */ + public InputStream getInputStream() { + return this.inputStream; + } + + /** + * getter for this.byteBuffer + * @return this.byteBuffer + */ + public byte[] getByteBuffer() { + return this.byteBuffer; + } + + /** + * getter for this.nowIndex + * @return this.nowIndex + */ + public int getNowIndex() { + return this.nowIndex; + } + + /** + * setter for this.nowIndex + * @param nowIndex this.nowIndex + */ + public void setNowIndex(int nowIndex) { + this.nowIndex = nowIndex; + } + + /** + * getter for this.nowLimit + * @return this.nowLimit + */ + public int getNowLimit() { + return this.nowLimit; + } + + /** + * setter for this.nowLimit + * @param nowLimit this.nowLimit + */ + public void setNowLimit(int nowLimit) { + this.nowLimit = nowLimit; + } +} diff --git a/src/main/java/org/apache/commons/io/input/buffer/UnsyncBufferedReader.java b/src/main/java/org/apache/commons/io/input/buffer/UnsyncBufferedReader.java new file mode 100644 index 00000000000..3d3eda21e84 --- /dev/null +++ b/src/main/java/org/apache/commons/io/input/buffer/UnsyncBufferedReader.java @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.io.input.buffer; + +import java.io.IOException; +import java.io.Reader; +import org.apache.commons.io.IOUtils; +import static org.apache.commons.io.IOUtils.EOF; + +/** + * A BufferedReader class who does not care about thread safety, but very much faster. + * + * Should be able to replace java.io.BufferedReader in nearly every use-cases when you + * need the Reader be buffered, but do not need it have thread safety. + */ +public class UnsyncBufferedReader extends Reader { + private final Reader reader; + private final char[] charBuffer; + + private int nowIndex = 0; + private int nowLimit = 0; + + /** + * Creates a new instance, which filters the given reader, and + * uses the given buffer size. + * + * @param reader The original reader, which is being buffered. + * @param charBufferSize size of the buffer. + */ + public UnsyncBufferedReader(Reader reader, int charBufferSize) { + this(reader, new char[charBufferSize]); + } + + /** + * Creates a new instance, which filters the given reader, and + * uses IOUtils.DEFAULT_BUFFER_SIZE. + * + * @param reader The original reader, which is being buffered. + * @see IOUtils#DEFAULT_BUFFER_SIZE + */ + public UnsyncBufferedReader(Reader reader) { + this(reader, IOUtils.DEFAULT_BUFFER_SIZE); + } + + /** + * Creates a new instance, which filters the given reader, and + * uses the given buffer. + * + * @param reader The original reader, which is being buffered. + * @param charBuffer buffer used. + */ + public UnsyncBufferedReader(Reader reader, char[] charBuffer) { + this.reader = reader; + this.charBuffer = charBuffer; + } + + /** + * {@inheritDoc} + */ + @Override + public int read(char[] cbuf, int off, int len) throws IOException { + if (len <= 0) { + return 0; + } + int currentBufferSize = this.nowLimit - this.nowIndex; + if (currentBufferSize == 0) { + int readLength; + do { + readLength = this.reader.read(this.charBuffer, 0, this.charBuffer.length); + } while (readLength == 0); + if (readLength == EOF) { + return EOF; + } + this.nowLimit = readLength; + this.nowIndex = 0; + currentBufferSize = this.nowLimit - this.nowIndex; + } + if (currentBufferSize <= len) { + System.arraycopy(this.charBuffer, this.nowIndex, cbuf, off, currentBufferSize); + this.nowLimit = this.nowIndex = 0; + return currentBufferSize; + } else { + System.arraycopy(this.charBuffer, this.nowIndex, cbuf, off, len); + this.nowIndex += len; + return len; + } + } + + /** + * {@inheritDoc} + */ + @Override + public int read() throws IOException { + int res = this.peek(); + if (res != EOF) { + eat(); + } + return res; + } + + /** + * see the next char, but not mark it as read. + * + * @return the next char + * @throws IOException by reader.read() + * @see #read() + */ + public int peek() throws IOException { + int currentBufferSize = this.nowLimit - this.nowIndex; + if (currentBufferSize == 0) { + int readLength; + do { + readLength = this.reader.read(this.charBuffer, 0, this.charBuffer.length); + } while (readLength == 0); + if (readLength == EOF) { + return EOF; + } + this.nowLimit = readLength; + this.nowIndex = 0; + return this.charBuffer[0]; + } + return this.charBuffer[this.nowIndex]; + } + + /** + * mark the current char as read. + * must be used after invoke peek. + * + * @see #read() + * @see #peek() + */ + public void eat() { + this.nowIndex++; + } + + /** + * {@inheritDoc} + */ + @Override + public void close() throws IOException { + if (this.reader != null) { + this.reader.close(); + } + } + + /** + * getter for this.reader + * @return this.reader + */ + public Reader getReader() { + return this.reader; + } + + /** + * getter for this.charBuffer + * @return this.charBuffer + */ + public char[] getCharBuffer() { + return this.charBuffer; + } + + /** + * getter for this.nowIndex + * @return this.nowIndex + */ + public int getNowIndex() { + return this.nowIndex; + } + + /** + * setter for this.nowIndex + * @param nowIndex this.nowIndex + */ + public void setNowIndex(int nowIndex) { + this.nowIndex = nowIndex; + } + + /** + * getter for this.nowLimit + * @return this.nowLimit + */ + public int getNowLimit() { + return this.nowLimit; + } + + /** + * setter for this.nowLimit + * @param nowLimit this.nowLimit + */ + public void setNowLimit(int nowLimit) { + this.nowLimit = nowLimit; + } +} diff --git a/src/test/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReaderTest.java b/src/test/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReaderTest.java new file mode 100644 index 00000000000..28857e3fe56 --- /dev/null +++ b/src/test/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReaderTest.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.io.input.buffer; + +import java.io.CharArrayReader; +import java.io.IOException; +import java.io.StringReader; +import java.util.Random; +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * modified from NonThreadSafeButFastBufferedReaderTest + * @see UnsyncBufferedReaderTest + */ +public class LineEndUnifiedBufferedReaderTest { + /** + * Always using the same seed should ensure a reproducable test. + */ + private final Random rnd = new Random(1530960934483L); + + @Test + public void testRandomRead() throws Exception { + final char[] inputBuffer = newInputBuffer(); + final char[] bufferCopy = new char[inputBuffer.length]; + final CharArrayReader bais = new CharArrayReader(inputBuffer); + @SuppressWarnings("resource") final LineEndUnifiedBufferedReader cbis = + new LineEndUnifiedBufferedReader(bais, 253); + int offset = 0; + final char[] readBuffer = new char[256]; + while (offset < bufferCopy.length) { + switch (rnd.nextInt(2)) { + case 0: { + final int res = cbis.read(); + if (res == IOUtils.EOF) { + throw new IllegalStateException("Unexpected EOF at offset " + offset); + } + if (inputBuffer[offset] != res) { + throw new IllegalStateException("Expected " + inputBuffer[offset] + " at offset " + offset + + ", got " + res); + } + ++offset; + break; + } + case 1: { + final int res = cbis.read(readBuffer, 0, rnd.nextInt(readBuffer.length + 1)); + if (res == IOUtils.EOF) { + throw new IllegalStateException("Unexpected EOF at offset " + offset); + } else if (res == 0) { + throw new IllegalStateException("Unexpected zero-byte-result at offset " + offset); + } else { + for (int i = 0; i < res; i++) { + if (inputBuffer[offset] != readBuffer[i]) { + throw new IllegalStateException("Expected " + inputBuffer[offset] + " at offset " + offset + ", got " + readBuffer[i]); + } + ++offset; + } + } + break; + } + default: + throw new IllegalStateException("Unexpected random choice value"); + } + } + bais.close(); + cbis.close(); + } + + @Test + public void testClose() throws Exception { + LineEndUnifiedBufferedReader b = new LineEndUnifiedBufferedReader(null); + closeSeveralTimes(b); + LineEndUnifiedBufferedReader b2 = + new LineEndUnifiedBufferedReader(new StringReader("")); + closeSeveralTimes(b2); + } + + private void closeSeveralTimes(LineEndUnifiedBufferedReader b) throws IOException { + b.close(); + b.close(); + b.close(); + b.close(); + b.close(); + } + + @Test + public void testFullRead() throws Exception { + LineEndUnifiedBufferedReader b = + new LineEndUnifiedBufferedReader(new StringReader("aaaaa")); + while (b.read() != IOUtils.EOF) { + } + } + + @Test + public void testFullReadArray() throws Exception { + LineEndUnifiedBufferedReader b = + new LineEndUnifiedBufferedReader(new StringReader("aaaaa")); + final char[] buffer = new char[5]; + while (true) { + final int res = b.read(buffer, 0, buffer.length); + if (res == IOUtils.EOF) { + break; + } + } + } + + @Test + public void testWeirdReadArray() throws Exception { + LineEndUnifiedBufferedReader b = + new LineEndUnifiedBufferedReader(new StringReader("aaaaa")); + final char[] buffer = new char[5]; + int res; + res = b.read(buffer, 0, 0); + assertEquals(res, 0); + res = b.read(buffer, 0, -20); + assertEquals(res, 0); + } + + /** + * Create a large, but random input buffer. + * Do not test `\r` problems in this test. + * `\r` problems are specially tested in IOUtilsTestCase.testContentEqualsIgnoreEOL + * @see org.apache.commons.io.IOUtilsTestCase#testContentEqualsIgnoreEOL() + */ + private char[] newInputBuffer() { + final char[] buffer = new char[16 * 512 + rnd.nextInt(512)]; + for (int i = 0; i < buffer.length; i++) { + buffer[i] = (char) rnd.nextInt(); + while (buffer[i] == '\r') { + buffer[i] = (char) rnd.nextInt(); + } + } + return buffer; + } + + @Test + public void testCachedCR_ReadArray() throws Exception { + LineEndUnifiedBufferedReader b = + new LineEndUnifiedBufferedReader(new StringReader("")); + b.setCachedCR(true); + char[] chars = new char[5]; + Assertions.assertEquals(b.read(chars), 1); + assertEquals('\n', chars[0]); + } + + @Test + public void testCachedCR_Read() throws Exception { + LineEndUnifiedBufferedReader b = + new LineEndUnifiedBufferedReader(new StringReader("")); + b.setCachedCR(true); + Assertions.assertEquals('\n', b.read()); + } + + @Test + public void testCR_ReadArray() throws Exception { + LineEndUnifiedBufferedReader b = + new LineEndUnifiedBufferedReader(new StringReader("\r")); + char[] chars = new char[5]; + Assertions.assertEquals(0, b.read(chars)); + Assertions.assertTrue(b.isCachedCR()); + Assertions.assertEquals(1, b.read(chars)); + assertEquals('\n', chars[0]); + } + + @Test + public void testCR_Read() throws Exception { + LineEndUnifiedBufferedReader b = + new LineEndUnifiedBufferedReader(new StringReader("\r")); + Assertions.assertEquals('\n', b.read()); + } +} diff --git a/src/test/java/org/apache/commons/io/input/buffer/UnsyncBufferedInputStreamTest.java b/src/test/java/org/apache/commons/io/input/buffer/UnsyncBufferedInputStreamTest.java new file mode 100644 index 00000000000..f8dc777f4a0 --- /dev/null +++ b/src/test/java/org/apache/commons/io/input/buffer/UnsyncBufferedInputStreamTest.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.io.input.buffer; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.StringReader; +import java.util.Random; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.input.ReaderInputStream; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * modified from CircularBufferInputStreamTest + * @see CircularBufferInputStreamTest + */ +public class UnsyncBufferedInputStreamTest { + /** + * Always using the same seed should ensure a reproducable test. + */ + private final Random rnd = new Random(1530960934483L); + + @Test + public void testRandomRead() throws Exception { + final byte[] inputBuffer = newInputBuffer(); + final byte[] bufferCopy = new byte[inputBuffer.length]; + final ByteArrayInputStream bais = new ByteArrayInputStream(inputBuffer); + @SuppressWarnings("resource") final UnsyncBufferedInputStream cbis = + new UnsyncBufferedInputStream(bais, 253); + int offset = 0; + final byte[] readBuffer = new byte[256]; + while (offset < bufferCopy.length) { + switch (rnd.nextInt(2)) { + case 0: { + final int res = cbis.read(); + if (res == IOUtils.EOF) { + throw new IllegalStateException("Unexpected EOF at offset " + offset); + } + if (inputBuffer[offset] != res) { + throw new IllegalStateException("Expected " + inputBuffer[offset] + " at offset " + offset + + ", got " + res); + } + ++offset; + break; + } + case 1: { + final int res = cbis.read(readBuffer, 0, rnd.nextInt(readBuffer.length + 1)); + if (res == IOUtils.EOF) { + throw new IllegalStateException("Unexpected EOF at offset " + offset); + } else if (res == 0) { + throw new IllegalStateException("Unexpected zero-byte-result at offset " + offset); + } else { + for (int i = 0; i < res; i++) { + if (inputBuffer[offset] != readBuffer[i]) { + throw new IllegalStateException("Expected " + inputBuffer[offset] + " at offset " + offset + ", got " + readBuffer[i]); + } + ++offset; + } + } + break; + } + default: + throw new IllegalStateException("Unexpected random choice value"); + } + } + bais.close(); + cbis.close(); + } + + @Test + public void testClose() throws Exception { + UnsyncBufferedInputStream b = new UnsyncBufferedInputStream(null); + closeSeveralTimes(b); + UnsyncBufferedInputStream b2 = + new UnsyncBufferedInputStream(new ReaderInputStream(new StringReader(""))); + closeSeveralTimes(b2); + } + + private void closeSeveralTimes(UnsyncBufferedInputStream b) throws IOException { + b.close(); + b.close(); + b.close(); + b.close(); + b.close(); + } + + @Test + public void testFullRead() throws Exception { + UnsyncBufferedInputStream b = + new UnsyncBufferedInputStream(new ReaderInputStream(new StringReader("aaaaa"))); + while (b.read() != IOUtils.EOF) { + } + } + + @Test + public void testFullReadArray() throws Exception { + UnsyncBufferedInputStream b = + new UnsyncBufferedInputStream(new ReaderInputStream(new StringReader("aaaaa"))); + final byte[] buffer = new byte[5]; + while (true) { + final int res = b.read(buffer, 0, buffer.length); + if (res == IOUtils.EOF) { + break; + } + } + } + + @Test + public void testWeirdReadArray() throws Exception { + UnsyncBufferedInputStream b = + new UnsyncBufferedInputStream(new ReaderInputStream(new StringReader("aaaaa"))); + final byte[] buffer = new byte[5]; + int res; + res = b.read(buffer, 0, 0); + assertEquals(res, 0); + res = b.read(buffer, 0, -20); + assertEquals(res, 0); + } + + /** + * Create a large, but random input buffer. + */ + private byte[] newInputBuffer() { + final byte[] buffer = new byte[16 * 512 + rnd.nextInt(512)]; + rnd.nextBytes(buffer); + return buffer; + } +} diff --git a/src/test/java/org/apache/commons/io/input/buffer/UnsyncBufferedReaderTest.java b/src/test/java/org/apache/commons/io/input/buffer/UnsyncBufferedReaderTest.java new file mode 100644 index 00000000000..34005c90e5c --- /dev/null +++ b/src/test/java/org/apache/commons/io/input/buffer/UnsyncBufferedReaderTest.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.io.input.buffer; + +import java.io.CharArrayReader; +import java.io.IOException; +import java.io.StringReader; +import java.util.Random; +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * modified from NonThreadSafeButFastBufferedInputStreamTest + * @see UnsyncBufferedInputStreamTest + */ +public class UnsyncBufferedReaderTest { + /** + * Always using the same seed should ensure a reproducable test. + */ + private final Random rnd = new Random(1530960934483L); + + @Test + public void testRandomRead() throws Exception { + final char[] inputBuffer = newInputBuffer(); + final char[] bufferCopy = new char[inputBuffer.length]; + final CharArrayReader bais = new CharArrayReader(inputBuffer); + @SuppressWarnings("resource") final UnsyncBufferedReader cbis = + new UnsyncBufferedReader(bais, 253); + int offset = 0; + final char[] readBuffer = new char[256]; + while (offset < bufferCopy.length) { + switch (rnd.nextInt(2)) { + case 0: { + final int res = cbis.read(); + if (res == IOUtils.EOF) { + throw new IllegalStateException("Unexpected EOF at offset " + offset); + } + if (inputBuffer[offset] != res) { + throw new IllegalStateException("Expected " + inputBuffer[offset] + " at offset " + offset + + ", got " + res); + } + ++offset; + break; + } + case 1: { + final int res = cbis.read(readBuffer, 0, rnd.nextInt(readBuffer.length + 1)); + if (res == IOUtils.EOF) { + throw new IllegalStateException("Unexpected EOF at offset " + offset); + } else if (res == 0) { + throw new IllegalStateException("Unexpected zero-byte-result at offset " + offset); + } else { + for (int i = 0; i < res; i++) { + if (inputBuffer[offset] != readBuffer[i]) { + throw new IllegalStateException("Expected " + inputBuffer[offset] + " at offset " + offset + ", got " + readBuffer[i]); + } + ++offset; + } + } + break; + } + default: + throw new IllegalStateException("Unexpected random choice value"); + } + } + bais.close(); + cbis.close(); + } + + @Test + public void testClose() throws Exception { + UnsyncBufferedReader b = new UnsyncBufferedReader(null); + closeSeveralTimes(b); + UnsyncBufferedReader b2 = + new UnsyncBufferedReader(new StringReader("")); + closeSeveralTimes(b2); + } + + private void closeSeveralTimes(UnsyncBufferedReader b) throws IOException { + b.close(); + b.close(); + b.close(); + b.close(); + b.close(); + } + + @Test + public void testFullRead() throws Exception { + UnsyncBufferedReader b = + new UnsyncBufferedReader(new StringReader("aaaaa")); + while (b.read() != IOUtils.EOF) { + } + } + + @Test + public void testFullReadArray() throws Exception { + UnsyncBufferedReader b = + new UnsyncBufferedReader(new StringReader("aaaaa")); + final char[] buffer = new char[5]; + while (true) { + final int res = b.read(buffer, 0, buffer.length); + if (res == IOUtils.EOF) { + break; + } + } + } + + @Test + public void testWeirdReadArray() throws Exception { + UnsyncBufferedReader b = + new UnsyncBufferedReader(new StringReader("aaaaa")); + final char[] buffer = new char[5]; + int res; + res = b.read(buffer, 0, 0); + assertEquals(res, 0); + res = b.read(buffer, 0, -20); + assertEquals(res, 0); + } + + /** + * Create a large, but random input buffer. + */ + private char[] newInputBuffer() { + final char[] buffer = new char[16 * 512 + rnd.nextInt(512)]; + for (int i = 0; i < buffer.length; i++) { + buffer[i] = (char) rnd.nextInt(); + } + return buffer; + } +} diff --git a/src/test/java/org/apache/commons/io/performance/IOUtilsContentEqualsPerformanceTest.java b/src/test/java/org/apache/commons/io/performance/IOUtilsContentEqualsPerformanceTest.java new file mode 100644 index 00000000000..da6a90d8976 --- /dev/null +++ b/src/test/java/org/apache/commons/io/performance/IOUtilsContentEqualsPerformanceTest.java @@ -0,0 +1,648 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.io.performance; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; +import java.util.Objects; +import java.util.concurrent.TimeUnit; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.input.buffer.LineEndUnifiedBufferedReader; +import org.apache.commons.io.input.buffer.UnsyncBufferedReader; +import org.apache.commons.lang3.StringUtils; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import static org.apache.commons.io.IOUtils.EOF; +import static org.apache.commons.io.IOUtils.toBufferedReader; + +/** + * Test to show whether using BitSet for removeAll() methods is faster than using HashSet. + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Warmup(iterations = 5, time = 10, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 10, timeUnit = TimeUnit.SECONDS) +@Fork(value = 1, jvmArgs = {"-server"}) +public class IOUtilsContentEqualsPerformanceTest { + static String[] STRINGS = new String[5]; + + static { + STRINGS[0] = StringUtils.repeat("ab", 1 << 24); + STRINGS[1] = STRINGS[0] + 'c'; + STRINGS[2] = STRINGS[0] + 'd'; + STRINGS[3] = StringUtils.repeat("ab\rab\n", 1 << 24); + STRINGS[4] = StringUtils.repeat("ab\r\nab\r", 1 << 24); + } + + static String SPECIAL_CASE_STRING_0 = StringUtils.repeat(StringUtils.repeat("ab", 1 << 24) + '\n', 2); + static String SPECIAL_CASE_STRING_1 = StringUtils.repeat(StringUtils.repeat("cd", 1 << 24) + '\n', 2); + + @Benchmark + public boolean[] testContentEqualsForFileNew() throws IOException { + boolean[] res = new boolean[2]; + try (InputStream inputStream1 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileBOM.xml"); + InputStream inputStream2 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileNoBOM.xml"); + Reader inputReader1 = new InputStreamReader(inputStream1); + Reader inputReader2 = new InputStreamReader(inputStream2); + ) { + res[0] = IOUtils.contentEquals(inputReader1, inputReader2); + } + try (InputStream inputStream1 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileBOM.xml"); + InputStream inputStream2 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileBOM.xml"); + Reader inputReader1 = new InputStreamReader(inputStream1); + Reader inputReader2 = new InputStreamReader(inputStream2); + ) { + res[1] = IOUtils.contentEquals(inputReader1, inputReader2); + } + return res; + } + + @Benchmark + public boolean[] testContentEqualsForFileOld() throws IOException { + boolean[] res = new boolean[2]; + try (InputStream inputStream1 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileBOM.xml"); + InputStream inputStream2 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileNoBOM.xml"); + Reader inputReader1 = new InputStreamReader(inputStream1); + Reader inputReader2 = new InputStreamReader(inputStream2); + ) { + res[0] = contentEqualsOld(inputReader1, inputReader2); + } + try (InputStream inputStream1 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileBOM.xml"); + InputStream inputStream2 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileBOM.xml"); + Reader inputReader1 = new InputStreamReader(inputStream1); + Reader inputReader2 = new InputStreamReader(inputStream2); + ) { + res[1] = contentEqualsOld(inputReader1, inputReader2); + } + return res; + } + + @Benchmark + public void testContentEqualsForStringNew(Blackhole blackhole) throws IOException { + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { + try (Reader inputReader1 = new StringReader(STRINGS[i]); + Reader inputReader2 = new StringReader(STRINGS[j]); + ) { + blackhole.consume(IOUtils.contentEquals(inputReader1, inputReader2)); + } + } + } + } + + @Benchmark + public void testContentEqualsForStringOld(Blackhole blackhole) throws IOException { + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { + try (Reader inputReader1 = new StringReader(STRINGS[i]); + Reader inputReader2 = new StringReader(STRINGS[j]); + ) { + blackhole.consume(contentEqualsOld(inputReader1, inputReader2)); + } + } + } + } + + @Benchmark + public boolean[] testContentEqualsIgnoreEOLForFileNew() throws IOException { + boolean[] res = new boolean[2]; + try (InputStream inputStream1 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileBOM.xml"); + InputStream inputStream2 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileNoBOM.xml"); + Reader inputReader1 = new InputStreamReader(inputStream1); + Reader inputReader2 = new InputStreamReader(inputStream2); + ) { + res[0] = contentEqualsIgnoreEOLNew1(inputReader1, inputReader2); + } + try (InputStream inputStream1 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileBOM.xml"); + InputStream inputStream2 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileBOM.xml"); + Reader inputReader1 = new InputStreamReader(inputStream1); + Reader inputReader2 = new InputStreamReader(inputStream2); + ) { + res[1] = contentEqualsIgnoreEOLNew1(inputReader1, inputReader2); + } + return res; + } + + @Benchmark + public boolean[] testContentEqualsIgnoreEOLForFileNew2() throws IOException { + boolean[] res = new boolean[2]; + try (InputStream inputStream1 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileBOM.xml"); + InputStream inputStream2 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileNoBOM.xml"); + Reader inputReader1 = new InputStreamReader(inputStream1); + Reader inputReader2 = new InputStreamReader(inputStream2); + ) { + res[0] = contentEqualsIgnoreEOLNew2(inputReader1, inputReader2); + } + try (InputStream inputStream1 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileBOM.xml"); + InputStream inputStream2 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileBOM.xml"); + Reader inputReader1 = new InputStreamReader(inputStream1); + Reader inputReader2 = new InputStreamReader(inputStream2); + ) { + res[1] = contentEqualsIgnoreEOLNew2(inputReader1, inputReader2); + } + return res; + } + + @Benchmark + public boolean[] testContentEqualsIgnoreEOLForFileOld() throws IOException { + boolean[] res = new boolean[2]; + try (InputStream inputStream1 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileBOM.xml"); + InputStream inputStream2 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileNoBOM.xml"); + Reader inputReader1 = new InputStreamReader(inputStream1); + Reader inputReader2 = new InputStreamReader(inputStream2); + ) { + res[0] = contentEqualsIgnoreEOLOld(inputReader1, inputReader2); + } + try (InputStream inputStream1 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileBOM.xml"); + InputStream inputStream2 = + this.getClass().getResourceAsStream("/org/apache/commons/io/testfileBOM.xml"); + Reader inputReader1 = new InputStreamReader(inputStream1); + Reader inputReader2 = new InputStreamReader(inputStream2); + ) { + res[1] = contentEqualsIgnoreEOLOld(inputReader1, inputReader2); + } + return res; + } + + @Benchmark + public void testContentEqualsIgnoreEOLForStringNew(Blackhole blackhole) throws IOException { + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { + try (Reader inputReader1 = new StringReader(STRINGS[i]); + Reader inputReader2 = new StringReader(STRINGS[j]); + ) { + blackhole.consume(contentEqualsIgnoreEOLNew1(inputReader1, inputReader2)); + } + } + } + } + + @Benchmark + public void testContentEqualsIgnoreEOLForStringNew2(Blackhole blackhole) throws IOException { + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { + try (Reader inputReader1 = new StringReader(STRINGS[i]); + Reader inputReader2 = new StringReader(STRINGS[j]); + ) { + blackhole.consume(contentEqualsIgnoreEOLNew2(inputReader1, inputReader2)); + } + } + } + } + + @Benchmark + public void testContentEqualsIgnoreEOLForStringOld(Blackhole blackhole) throws IOException { + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { + try (Reader inputReader1 = new StringReader(STRINGS[i]); + Reader inputReader2 = new StringReader(STRINGS[j]); + ) { + blackhole.consume(contentEqualsIgnoreEOLOld(inputReader1, inputReader2)); + } + } + } + } + + @Benchmark + public void testSpecialCaseForContentEqualsIgnoreEOLForStringNew(Blackhole blackhole) throws IOException { + try (Reader inputReader1 = new StringReader(SPECIAL_CASE_STRING_0); + Reader inputReader2 = new StringReader(SPECIAL_CASE_STRING_1); + ) { + blackhole.consume(contentEqualsIgnoreEOLNew1(inputReader1, inputReader2)); + } + } + + @Benchmark + public void testSpecialCaseForContentEqualsIgnoreEOLForStringNew2(Blackhole blackhole) throws IOException { + try (Reader inputReader1 = new StringReader(SPECIAL_CASE_STRING_0); + Reader inputReader2 = new StringReader(SPECIAL_CASE_STRING_1); + ) { + blackhole.consume(contentEqualsIgnoreEOLNew2(inputReader1, inputReader2)); + } + } + + @Benchmark + public void testSpecialCaseForContentEqualsIgnoreEOLForStringOld(Blackhole blackhole) throws IOException { + try (Reader inputReader1 = new StringReader(SPECIAL_CASE_STRING_0); + Reader inputReader2 = new StringReader(SPECIAL_CASE_STRING_1); + ) { + blackhole.consume(contentEqualsIgnoreEOLOld(inputReader1, inputReader2)); + } + } + + public static boolean contentEqualsOld(final Reader input1, final Reader input2) + throws IOException { + if (input1 == input2) { + return true; + } + if (input1 == null ^ input2 == null) { + return false; + } + final BufferedReader bufferedInput1 = toBufferedReader(input1); + final BufferedReader bufferedInput2 = toBufferedReader(input2); + + int ch = bufferedInput1.read(); + while (EOF != ch) { + final int ch2 = bufferedInput2.read(); + if (ch != ch2) { + return false; + } + ch = bufferedInput1.read(); + } + + return bufferedInput2.read() == EOF; + } + + public static boolean contentEqualsIgnoreEOLOld(final Reader input1, final Reader input2) + throws IOException { + if (input1 == input2) { + return true; + } + if (input1 == null ^ input2 == null) { + return false; + } + final BufferedReader br1 = toBufferedReader(input1); + final BufferedReader br2 = toBufferedReader(input2); + + String line1 = br1.readLine(); + String line2 = br2.readLine(); + while (line1 != null && line1.equals(line2)) { + line1 = br1.readLine(); + line2 = br2.readLine(); + } + return Objects.equals(line1, line2); + } + + //----- + + public static boolean contentEqualsIgnoreEOLNew2(final Reader input1, final Reader input2) + throws IOException { + if (input1 == input2) { + return true; + } + if (input1 == null ^ input2 == null) { + return false; + } + + final LineEndUnifiedBufferedReader bufferedInput1; + if (input1 instanceof LineEndUnifiedBufferedReader) { + bufferedInput1 = (LineEndUnifiedBufferedReader) input1; + } else { + bufferedInput1 = new LineEndUnifiedBufferedReader(input1); + } + + final LineEndUnifiedBufferedReader bufferedInput2; + if (input2 instanceof LineEndUnifiedBufferedReader) { + bufferedInput2 = (LineEndUnifiedBufferedReader) input2; + } else { + bufferedInput2 = new LineEndUnifiedBufferedReader(input2); + } + + /* + * We use this variable to mark if last char be '\n'. + * Because "a" and "a\n" is thought contentEqualsIgnoreEOL, + * but "\n" and "\n\n" is thought not contentEqualsIgnoreEOL. + */ + boolean justNewLine = false; + + int currentChar1; + int currentChar2; + + while (true) { + currentChar1 = bufferedInput1.peek(); + currentChar2 = bufferedInput2.peek(); + + if (currentChar1 == EOF) { + if (currentChar2 == EOF) { + return true; + } else { + if (!justNewLine) { + return inputOnlyHaveCRLForEOF( bufferedInput2); + } + return false; + } + } else if (currentChar2 == EOF) { + if (!justNewLine) { + return inputOnlyHaveCRLForEOF(bufferedInput1); + } + return false; + } + if (currentChar1 != currentChar2) { + return false; + } + if (currentChar1 == '\n') { + justNewLine = true; + } + bufferedInput1.eat(); + bufferedInput2.eat(); + } + } + + private static boolean inputOnlyHaveCRLForEOF(LineEndUnifiedBufferedReader input) throws IOException { + final int char1 = input.read(); + if (char1 == EOF) { + return true; + } + if (char1 == '\n') { + return input.read() == EOF; + } + return false; + } + + public static boolean contentEqualsIgnoreEOLNew1(final Reader input1, final Reader input2) + throws IOException { + if (input1 == input2) { + return true; + } + if (input1 == null ^ input2 == null) { + return false; + } + + final UnsyncBufferedReader bufferedInput1; + if (input1 instanceof UnsyncBufferedReader) { + bufferedInput1 = (UnsyncBufferedReader) input1; + } else { + bufferedInput1 = new UnsyncBufferedReader(input1); + } + + final UnsyncBufferedReader bufferedInput2; + if (input2 instanceof UnsyncBufferedReader) { + bufferedInput2 = (UnsyncBufferedReader) input2; + } else { + bufferedInput2 = new UnsyncBufferedReader(input2); + } + + // 0 for last be '\r'. + // 1 for last be '\n'. (or '\r' but started a new line.) + // 2 for last be other chars. + int lastState1 = 1; + int lastState2 = 1; + + int currentChar1; + int currentChar2; + + while (true) { + currentChar1 = bufferedInput1.peek(); + currentChar2 = bufferedInput2.peek(); + + if (currentChar1 == EOF) { + // if input1 ended. + if (currentChar2 == EOF) { + // if both input1 and input2 ended here, we just return true. + return true; + } else { + // if input2 not ended. + switch (lastState1) { + case 0: + // if last state of input1 is "\r" + case 1: + // if last state of input1 is "new line" + switch (lastState2) { + case 0: + // if last state of input2 is "\r" + // if the next char of input2 is '\n', then it become "\r\n". + // if no other more chars, then it equals, + // as input1 ends with "\r", input2 ends with "\r\n", and both of them ended. + // otherwise illegal. + if (currentChar2 == '\n') { + bufferedInput2.eat(); + return bufferedInput2.peek() == EOF; + } + return false; + default: + // if last state of input2 is "new line" or "normal" + // and input1 ends, so input1 and input2 differ, thus illegal. + return false; + } + case 2: + // if last state of input1 is "normal" + switch (lastState2) { + case 2: + // if last state of input2 is "normal" + switch (currentChar2) { + case '\r': + // if the next of input2 is '\r' or "\r\n", then legal. + // otherwise illegal. + bufferedInput2.eat(); + currentChar2 = bufferedInput2.peek(); + if (currentChar2 == EOF) { + return true; + } else if (currentChar2 == '\n') { + bufferedInput2.eat(); + return bufferedInput2.peek() == EOF; + } + return false; + case '\n': + // if the next of input2 is '\n' then legal. + // otherwise illegal. + bufferedInput2.eat(); + return bufferedInput2.peek() == EOF; + default: + // illegal + return false; + } + default: + // illegal + return false; + } + default: + // shall never enter + } + } + } else if (currentChar2 == EOF) { + // if input1 not ended, input2 ended. + // mirror to above, so please see comments above. + switch (lastState2) { + case 0: + case 1: + switch (lastState1) { + case 0: + if (currentChar1 == '\n') { + bufferedInput1.eat(); + return bufferedInput1.peek() == EOF; + } + return false; + default: + return false; + } + case 2: + switch (lastState1) { + case 2: + switch (currentChar1) { + case '\r': + bufferedInput1.eat(); + currentChar1 = bufferedInput1.peek(); + if (currentChar1 == EOF) { + return true; + } else if (currentChar1 == '\n') { + bufferedInput1.eat(); + return bufferedInput1.peek() == EOF; + } + return false; + case '\n': + bufferedInput1.eat(); + return bufferedInput1.peek() == EOF; + default: + return false; + } + default: + return false; + } + default: + // shall never enter + } + } + // if both input1 and input2 not end we can enter here. + switch (currentChar1) { + case '\r': + // if input1's next is '\r' + switch (currentChar2) { + case '\r': + // if input2's next is '\r' + // then both lastState be R. + lastState1 = lastState2 = 0; + bufferedInput1.eat(); + bufferedInput2.eat(); + continue; + case '\n': + // if input2's next is '\n' + // then we check whether input1's next next is '\n' + // if so, then nowCheck1++, means consume an additional char this turn, + // as it becomes "\r\n" in input1. + // otherwise, we do not consume the additional char, but it is still legal, + // because input1 ends with "\r". + bufferedInput1.eat(); + currentChar1 = bufferedInput1.peek(); + lastState1 = lastState2 = 1; + bufferedInput2.eat(); + if (currentChar1 == EOF) { + continue; + } + if (currentChar1 == '\n') { + bufferedInput1.eat(); + } + continue; + default: + // if input2's next is normal. + // illegal. + return false; + } + case '\n': + // if input1's next is '\n' + switch (currentChar2) { + case '\n': + // if input2's next is '\n' + lastState1 = lastState2 = 1; + bufferedInput1.eat(); + bufferedInput2.eat(); + continue; + case '\r': + // if input2's next is '\r' + // then we check whether input2's next next is '\n' + // if so, then nowCheck2++, means consume an additional char this turn, + // as it becomes "\r\n" in input2. + // otherwise, we do not consume the additional char, but it is still legal, + // because input2 ends with "\r". + bufferedInput2.eat(); + currentChar2 = bufferedInput2.peek(); + lastState1 = lastState2 = 1; + bufferedInput1.eat(); + if (currentChar2 == EOF) { + continue; + } + if (currentChar2 == '\n') { + bufferedInput2.eat(); + } + continue; + default: + // if input2's next is normal. + // if input1's last is '\r', then it can become "\r\n", then legal. + // otherwise illegal. + if (lastState1 == 0) { + lastState1 = 1; + bufferedInput1.eat(); + continue; + } else { + return false; + } + } + default: + // if input1's next is normal. + switch (currentChar2) { + case '\n': + // if input2's next is '\n'. + // if input2's last is '\r', then it can become "\r\n", then legal. + // otherwise illegal. + if (lastState2 == 0) { + lastState2 = 1; + bufferedInput2.eat(); + continue; + } else { + return false; + } + case '\r': + // if input2's next is '\r'. + // illegal. + return false; + default: + // if input2's next is normal. + // if equal then legal. + // otherwise illegal. + if (currentChar1 != currentChar2) { + return false; + } + lastState1 = lastState2 = 2; + bufferedInput1.eat(); + bufferedInput2.eat(); + continue; + } + } + } + } +} \ No newline at end of file