From 1f7750a403c6f50e7eae95b1de199067813ab1e7 Mon Sep 17 00:00:00 2001 From: XenoAmess Date: Wed, 10 Feb 2021 00:19:06 +0800 Subject: [PATCH] [IO-670] refine IOUtils.contentEquals(Reader, Reader) --- .../java/org/apache/commons/io/IOUtils.java | 91 ++++- .../buffer/LineEndUnifiedBufferedReader.java | 328 ++++++++++++++++++ .../LineEndUnifiedBufferedReaderTest.java | 186 ++++++++++ ...ontentEqualsIgnoreEOLReadersBenchmark.java | 290 ++++++++++++++++ 4 files changed, 887 insertions(+), 8 deletions(-) create mode 100644 src/main/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReader.java create mode 100644 src/test/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReaderTest.java create mode 100644 src/test/java/org/apache/commons/io/jmh/IOUtilsContentEqualsIgnoreEOLReadersBenchmark.java diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index 1d3aa0e17d7..63b03648f8b 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -51,6 +51,7 @@ import java.util.function.Consumer; import org.apache.commons.io.function.IOConsumer; +import org.apache.commons.io.input.buffer.LineEndUnifiedBufferedReader; import org.apache.commons.io.output.AppendableWriter; import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.commons.io.output.NullOutputStream; @@ -897,16 +898,90 @@ public static boolean contentEqualsIgnoreEOL(final Reader reader1, final Reader if (reader1 == null ^ reader2 == null) { return false; } - final BufferedReader br1 = toBufferedReader(reader1); - final BufferedReader br2 = toBufferedReader(reader2); - String line1 = br1.readLine(); - String line2 = br2.readLine(); - while (line1 != null && line1.equals(line2)) { - line1 = br1.readLine(); - line2 = br2.readLine(); + final LineEndUnifiedBufferedReader bufferedInput1; + if (reader1 instanceof LineEndUnifiedBufferedReader) { + bufferedInput1 = (LineEndUnifiedBufferedReader) reader1; + } else { + bufferedInput1 = new LineEndUnifiedBufferedReader(reader1); } - return Objects.equals(line1, line2); + + final LineEndUnifiedBufferedReader bufferedInput2; + if (reader2 instanceof LineEndUnifiedBufferedReader) { + bufferedInput2 = (LineEndUnifiedBufferedReader) reader2; + } else { + bufferedInput2 = new LineEndUnifiedBufferedReader(reader2); + } + + /* + * We use this variable to mark if last char be '\n'. + * Because "a" and "a\n" is thought contentEqualsIgnoreEOL, + * but "\n" and "\n\n" is thought not contentEqualsIgnoreEOL. + */ + boolean justNewLine = true; + + int currentChar1; + int currentChar2; + + while (true) { + currentChar1 = bufferedInput1.peek(); + currentChar2 = bufferedInput2.peek(); + + if (currentChar1 == EOF) { + if (currentChar2 == EOF) { + return true; + } else { + if (!justNewLine) { + return inputOnlyHaveCRLForEOF( bufferedInput2, currentChar2); + } + return false; + } + } else if (currentChar2 == EOF) { + if (!justNewLine) { + return inputOnlyHaveCRLForEOF(bufferedInput1, currentChar1); + } + return false; + } + if (currentChar1 != currentChar2) { + return false; + } + justNewLine = currentChar1 == '\n'; + bufferedInput1.eat(); + bufferedInput2.eat(); + } + } + + /** + * private function used only in contentEqualsIgnoreEOL. + * used in contentEqualsIgnoreEOL to detect whether a input only have CRLF or EOF. + * @param input input reader + * @param currentChar current peek char of input + * @return true/false + * @throws IOException by input.read(), not me. + * @see #contentEqualsIgnoreEOL(Reader, Reader) + */ + private static boolean inputOnlyHaveCRLForEOF(LineEndUnifiedBufferedReader input, int currentChar) throws IOException { + + /* + * logically there should be some code like + * + * if (char1 == EOF) { + * return true; + * } + * + * here. + * + * But actually, if this input's read() is EOF, then we will not invoke this function at all. + * So the check is deleted. + * + * You can go contentEqualsIgnoreEOL for details. + */ + + if (currentChar == '\n') { + input.eat(); + return input.read() == EOF; + } + return false; } /** diff --git a/src/main/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReader.java b/src/main/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReader.java new file mode 100644 index 00000000000..e1f1c061a76 --- /dev/null +++ b/src/main/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReader.java @@ -0,0 +1,328 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.io.input.buffer; + +import java.io.IOException; +import java.io.Reader; +import org.apache.commons.io.IOUtils; +import static org.apache.commons.io.IOUtils.EOF; + +/** + * A NonThreadSafeButFastBufferedReader who use some filters to make line ends unified. + * + * + */ +public class LineEndUnifiedBufferedReader extends Reader { + + private final Reader reader; + + private final char[] charBuffer; + + private int nowIndex = 0; + + private int nowLimit = 0; + + private boolean cachedCR; + + /** + * Creates a new instance, which filters the given reader, and + * uses the given buffer size. + * + * @param reader The original reader, which is being buffered. + * @param charBufferSize size of the buffer. + */ + public LineEndUnifiedBufferedReader(Reader reader, int charBufferSize) { + this(reader, new char[charBufferSize]); + } + + /** + * Creates a new instance, which filters the given reader, and + * uses IOUtils.DEFAULT_BUFFER_SIZE. + * + * @param reader The original reader, which is being buffered. + * @see IOUtils#DEFAULT_BUFFER_SIZE + */ + public LineEndUnifiedBufferedReader(Reader reader) { + this(reader, IOUtils.DEFAULT_BUFFER_SIZE); + } + + /** + * Creates a new instance, which filters the given reader, and + * uses the given buffer. + * + * @param reader The original reader, which is being buffered. + * @param charBuffer buffer used. + */ + public LineEndUnifiedBufferedReader(Reader reader, char[] charBuffer) { + this.reader = reader; + this.charBuffer = charBuffer; + } + + /** + * {@inheritDoc} + */ + @Override + public int read(char[] cbuf, int off, int len) throws IOException { + if (len <= 0) { + return 0; + } + final char[] charBufferLocal = this.getCharBuffer(); + final Reader readerLocal = this.getReader(); + int nowLimitLocal = this.getNowLimit(); + int nowIndexLocal = this.getNowIndex(); + int currentBufferSize = nowLimitLocal - nowIndexLocal; + + if (currentBufferSize == 0) { + nowLimitLocal = nowIndexLocal = 0; + if (this.cachedCR) { + charBufferLocal[nowLimitLocal++] = '\r'; + } + + int readLength; + do { + readLength = readerLocal.read(charBufferLocal, 0, charBufferLocal.length - nowLimitLocal); + } while (readLength == 0); + if (readLength == EOF) { + if (this.cachedCR) { + this.cachedCR = false; + cbuf[off] = charBufferLocal[nowIndexLocal++] = '\n'; + this.setNowIndex(nowIndexLocal); + this.setNowLimit(nowLimitLocal); + return 1; + } + this.setNowIndex(nowIndexLocal); + this.setNowLimit(nowLimitLocal); + return EOF; + } + nowLimitLocal += readLength; + + if (charBufferLocal[nowLimitLocal - 1] == '\r') { + --nowLimitLocal; + this.cachedCR = true; + } else { + this.cachedCR = false; + } + this.setNowLimit(nowLimitLocal); + if (nowLimitLocal == 0) { + this.setNowIndex(nowIndexLocal); + return 0; + } + this.filter(); + nowLimitLocal = this.getNowLimit(); + nowIndexLocal = this.getNowIndex(); + currentBufferSize = nowLimitLocal - nowIndexLocal; + } + if (currentBufferSize <= len) { + System.arraycopy(charBufferLocal, nowIndexLocal, cbuf, off, currentBufferSize); + nowLimitLocal = nowIndexLocal = 0; + this.setNowIndex(nowIndexLocal); + this.setNowLimit(nowLimitLocal); + return currentBufferSize; + } else { + System.arraycopy(charBufferLocal, nowIndexLocal, cbuf, off, len); + nowIndexLocal += len; + this.setNowIndex(nowIndexLocal); + this.setNowLimit(nowLimitLocal); + return len; + } + } + + /** + * {@inheritDoc} + */ + public int peek() throws IOException { + final char[] charBufferLocal = this.getCharBuffer(); + final Reader readerLocal = this.getReader(); + int nowLimitLocal = this.getNowLimit(); + int nowIndexLocal = this.getNowIndex(); + + int currentBufferSize = nowLimitLocal - nowIndexLocal; + if (currentBufferSize == 0) { + nowLimitLocal = nowIndexLocal = 0; + if (this.cachedCR) { + charBufferLocal[nowLimitLocal++] = '\r'; + } + + int readLength; + do { + readLength = readerLocal.read(charBufferLocal, 0, charBufferLocal.length - nowLimitLocal); + } while (readLength == 0); + if (readLength == EOF) { + if (this.cachedCR) { + this.cachedCR = false; + this.setNowIndex(nowIndexLocal); + this.setNowLimit(nowLimitLocal); + return charBufferLocal[nowIndexLocal] = '\n'; + } + this.setNowIndex(nowIndexLocal); + this.setNowLimit(nowLimitLocal); + return EOF; + } + nowLimitLocal += readLength; + + if (charBufferLocal[nowLimitLocal - 1] == '\r') { + --nowLimitLocal; + this.cachedCR = true; + } else { + this.cachedCR = false; + } + this.setNowLimit(nowLimitLocal); + if (nowLimitLocal == 0) { + this.setNowIndex(nowIndexLocal); + return this.peek(); + } + this.filter(); + nowLimitLocal = this.getNowLimit(); + nowIndexLocal = this.getNowIndex(); + } + return charBufferLocal[nowIndexLocal]; + } + + /** + * Make sure chars in the charBuffer have no '\r'. + * "\r\n" in original reader will become "\n", + * "\n" in original reader will become "\n", + * "\r" with normal character behind in original reader will become "\n". + * Other chars should not change. + * After the filter, change this.nowIndex accordingly. + */ + private void filter() { + final char[] charBufferLocal = this.getCharBuffer(); + + int i = this.getNowLimit() - 1; + int j = i; + if (i >= 0) { + for (; i >= 0; --i, --j) { + if (charBufferLocal[i] == '\n') { + charBufferLocal[j] = '\n'; + final int i_1 = i - 1; + if (i_1 >= 0 && charBufferLocal[i_1] == '\r') { + --i; + } + } else if (charBufferLocal[i] == '\r') { + charBufferLocal[j] = '\n'; + } else { + charBufferLocal[j] = charBufferLocal[i]; + } + } + this.setNowIndex(j + 1); + } + } + + /** + * getter for this.cacheCR + * @return this.cacheCR + */ + public boolean isCachedCR() { + return this.cachedCR; + } + + /** + * setter for this.cacheCR + * @param cachedCR this.cacheCR + */ + public void setCachedCR(boolean cachedCR) { + this.cachedCR = cachedCR; + } + + /** + * mark the current char as read. + * must be used after invoke peek. + * + * @see #read() + * @see #peek() + */ + public void eat() { + this.nowIndex++; + } + + /** + * {@inheritDoc} + */ + @Override + public int read() throws IOException { + int res = this.peek(); + if (res != EOF) { + eat(); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public void close() throws IOException { + if (this.reader != null) { + this.reader.close(); + } + } + + /** + * getter for this.reader + * @return this.reader + */ + public Reader getReader() { + return this.reader; + } + + /** + * getter for this.charBuffer + * @return this.charBuffer + */ + public char[] getCharBuffer() { + return this.charBuffer; + } + + /** + * getter for this.nowIndex + * @return this.nowIndex + */ + public int getNowIndex() { + return this.nowIndex; + } + + /** + * setter for this.nowIndex + * @param nowIndex this.nowIndex + */ + public void setNowIndex(int nowIndex) { + this.nowIndex = nowIndex; + } + + /** + * getter for this.nowLimit + * @return this.nowLimit + */ + public int getNowLimit() { + return this.nowLimit; + } + + /** + * setter for this.nowLimit + * @param nowLimit this.nowLimit + */ + public void setNowLimit(int nowLimit) { + this.nowLimit = nowLimit; + } + +} diff --git a/src/test/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReaderTest.java b/src/test/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReaderTest.java new file mode 100644 index 00000000000..227974a18f3 --- /dev/null +++ b/src/test/java/org/apache/commons/io/input/buffer/LineEndUnifiedBufferedReaderTest.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.io.input.buffer; + +import java.io.CharArrayReader; +import java.io.IOException; +import java.io.StringReader; +import java.util.Random; +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * modified from NonThreadSafeButFastBufferedReaderTest + */ +public class LineEndUnifiedBufferedReaderTest { + /** + * Always using the same seed should ensure a reproducable test. + */ + private final Random rnd = new Random(1530960934483L); + + @Test + public void testRandomRead() throws Exception { + final char[] inputBuffer = newInputBuffer(); + final char[] bufferCopy = new char[inputBuffer.length]; + final CharArrayReader bais = new CharArrayReader(inputBuffer); + @SuppressWarnings("resource") final LineEndUnifiedBufferedReader cbis = + new LineEndUnifiedBufferedReader(bais, 253); + int offset = 0; + final char[] readBuffer = new char[256]; + while (offset < bufferCopy.length) { + switch (rnd.nextInt(2)) { + case 0: { + final int res = cbis.read(); + if (res == IOUtils.EOF) { + throw new IllegalStateException("Unexpected EOF at offset " + offset); + } + if (inputBuffer[offset] != res) { + throw new IllegalStateException("Expected " + inputBuffer[offset] + " at offset " + offset + + ", got " + res); + } + ++offset; + break; + } + case 1: { + final int res = cbis.read(readBuffer, 0, rnd.nextInt(readBuffer.length + 1)); + if (res == IOUtils.EOF) { + throw new IllegalStateException("Unexpected EOF at offset " + offset); + } else if (res == 0) { + throw new IllegalStateException("Unexpected zero-byte-result at offset " + offset); + } else { + for (int i = 0; i < res; i++) { + if (inputBuffer[offset] != readBuffer[i]) { + throw new IllegalStateException("Expected " + inputBuffer[offset] + " at offset " + offset + ", got " + readBuffer[i]); + } + ++offset; + } + } + break; + } + default: + throw new IllegalStateException("Unexpected random choice value"); + } + } + bais.close(); + cbis.close(); + } + + @Test + public void testClose() throws Exception { + LineEndUnifiedBufferedReader b = new LineEndUnifiedBufferedReader(null); + closeSeveralTimes(b); + LineEndUnifiedBufferedReader b2 = + new LineEndUnifiedBufferedReader(new StringReader("")); + closeSeveralTimes(b2); + } + + private void closeSeveralTimes(LineEndUnifiedBufferedReader b) throws IOException { + b.close(); + b.close(); + b.close(); + b.close(); + b.close(); + } + + @Test + public void testFullRead() throws Exception { + LineEndUnifiedBufferedReader b = + new LineEndUnifiedBufferedReader(new StringReader("aaaaa")); + while (b.read() != IOUtils.EOF) { + } + } + + @Test + public void testFullReadArray() throws Exception { + LineEndUnifiedBufferedReader b = + new LineEndUnifiedBufferedReader(new StringReader("aaaaa")); + final char[] buffer = new char[5]; + while (true) { + final int res = b.read(buffer, 0, buffer.length); + if (res == IOUtils.EOF) { + break; + } + } + } + + @Test + public void testWeirdReadArray() throws Exception { + LineEndUnifiedBufferedReader b = + new LineEndUnifiedBufferedReader(new StringReader("aaaaa")); + final char[] buffer = new char[5]; + int res; + res = b.read(buffer, 0, 0); + assertEquals(res, 0); + res = b.read(buffer, 0, -20); + assertEquals(res, 0); + } + + /** + * Create a large, but random input buffer. + * Do not test `\r` problems in this test. + * `\r` problems are specially tested in IOUtilsTestCase.testContentEqualsIgnoreEOL + * @see org.apache.commons.io.IOUtilsTestCase#testContentEqualsIgnoreEOL() + */ + private char[] newInputBuffer() { + final char[] buffer = new char[16 * 512 + rnd.nextInt(512)]; + for (int i = 0; i < buffer.length; i++) { + buffer[i] = (char) rnd.nextInt(); + while (buffer[i] == '\r') { + buffer[i] = (char) rnd.nextInt(); + } + } + return buffer; + } + + @Test + public void testCachedCR_ReadArray() throws Exception { + LineEndUnifiedBufferedReader b = + new LineEndUnifiedBufferedReader(new StringReader("")); + b.setCachedCR(true); + char[] chars = new char[5]; + Assertions.assertEquals(b.read(chars), 1); + assertEquals('\n', chars[0]); + } + + @Test + public void testCachedCR_Read() throws Exception { + LineEndUnifiedBufferedReader b = + new LineEndUnifiedBufferedReader(new StringReader("")); + b.setCachedCR(true); + Assertions.assertEquals('\n', b.read()); + } + + @Test + public void testCR_ReadArray() throws Exception { + LineEndUnifiedBufferedReader b = + new LineEndUnifiedBufferedReader(new StringReader("\r")); + char[] chars = new char[5]; + Assertions.assertEquals(0, b.read(chars)); + Assertions.assertTrue(b.isCachedCR()); + Assertions.assertEquals(1, b.read(chars)); + assertEquals('\n', chars[0]); + } + + @Test + public void testCR_Read() throws Exception { + LineEndUnifiedBufferedReader b = + new LineEndUnifiedBufferedReader(new StringReader("\r")); + Assertions.assertEquals('\n', b.read()); + } +} diff --git a/src/test/java/org/apache/commons/io/jmh/IOUtilsContentEqualsIgnoreEOLReadersBenchmark.java b/src/test/java/org/apache/commons/io/jmh/IOUtilsContentEqualsIgnoreEOLReadersBenchmark.java new file mode 100644 index 00000000000..32a16f3386d --- /dev/null +++ b/src/test/java/org/apache/commons/io/jmh/IOUtilsContentEqualsIgnoreEOLReadersBenchmark.java @@ -0,0 +1,290 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.io.jmh; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; +import java.nio.charset.Charset; +import java.util.Objects; +import java.util.concurrent.TimeUnit; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.input.buffer.LineEndUnifiedBufferedReader; +import org.apache.commons.lang3.StringUtils; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import static org.apache.commons.io.IOUtils.EOF; +import static org.apache.commons.io.IOUtils.toBufferedReader; + +/** + * Test different implementations of {@link IOUtils#contentEqualsIgnoreEOL(Reader, Reader)}. + * + *
+ * IOUtilsContentEqualsIgnoreEOLReadersBenchmark.testFileCurrent                                      avgt    5      510173.062 ▒      4124.634  ns/op
+ * IOUtilsContentEqualsIgnoreEOLReadersBenchmark.testFilePr118                                        avgt    5      513733.905 ▒      6157.818  ns/op
+ * IOUtilsContentEqualsIgnoreEOLReadersBenchmark.testFileRelease_2_8_0                                avgt    5      498785.100 ▒      9845.248  ns/op
+ * IOUtilsContentEqualsIgnoreEOLReadersBenchmark.testStringCurrent                                    avgt    5  1708154223.333 ▒ 104024141.073  ns/op
+ * IOUtilsContentEqualsIgnoreEOLReadersBenchmark.testStringPr118                                      avgt    5  1714266053.333 ▒  44126767.233  ns/op
+ * IOUtilsContentEqualsIgnoreEOLReadersBenchmark.testStringRelease_2_8_0                              avgt    5  4237073486.667 ▒ 217596541.348  ns/op
+ * 
+ */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Warmup(iterations = 5, time = 10, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 10, timeUnit = TimeUnit.SECONDS) +@Fork(value = 1, jvmArgs = {"-server"}) +public class IOUtilsContentEqualsIgnoreEOLReadersBenchmark { + + private static final int STRING_LEN = 1 << 24; + private static final String TEST_PATH_A = "/org/apache/commons/io/testfileBOM.xml"; + private static final String TEST_PATH_16K_A = "/org/apache/commons/io/abitmorethan16k.txt"; + private static final String TEST_PATH_16K_A_COPY = "/org/apache/commons/io/abitmorethan16kcopy.txt"; + private static final String TEST_PATH_B = "/org/apache/commons/io/testfileNoBOM.xml"; + private static final Charset DEFAULT_CHARSET = Charset.defaultCharset(); + static String[] STRINGS = new String[5]; + + static { + STRINGS[0] = StringUtils.repeat("ab", STRING_LEN); + STRINGS[1] = STRINGS[0] + 'c'; + STRINGS[2] = STRINGS[0] + 'd'; + STRINGS[3] = StringUtils.repeat("ab\rab\n", STRING_LEN); + STRINGS[4] = StringUtils.repeat("ab\r\nab\r", STRING_LEN); + } + + static String SPECIAL_CASE_STRING_0 = StringUtils.repeat(StringUtils.repeat("ab", STRING_LEN) + '\n', 2); + static String SPECIAL_CASE_STRING_1 = StringUtils.repeat(StringUtils.repeat("cd", STRING_LEN) + '\n', 2); + + @SuppressWarnings("resource") + public static boolean contentEqualsIgnoreEOL_release_2_8_0(final Reader reader1, final Reader reader2) throws IOException { + if (reader1 == reader2) { + return true; + } + if (reader1 == null ^ reader2 == null) { + return false; + } + final BufferedReader br1 = toBufferedReader(reader1); + final BufferedReader br2 = toBufferedReader(reader2); + + String line1 = br1.readLine(); + String line2 = br2.readLine(); + while (line1 != null && line1.equals(line2)) { + line1 = br1.readLine(); + line2 = br2.readLine(); + } + return Objects.equals(line1, line2); + } + + public static boolean contentEqualsIgnoreEOLPr118(final Reader reader1, final Reader reader2) + throws IOException { + if (reader1 == reader2) { + return true; + } + if (reader1 == null ^ reader2 == null) { + return false; + } + + final LineEndUnifiedBufferedReader bufferedInput1; + if (reader1 instanceof LineEndUnifiedBufferedReader) { + bufferedInput1 = (LineEndUnifiedBufferedReader) reader1; + } else { + bufferedInput1 = new LineEndUnifiedBufferedReader(reader1); + } + + final LineEndUnifiedBufferedReader bufferedInput2; + if (reader2 instanceof LineEndUnifiedBufferedReader) { + bufferedInput2 = (LineEndUnifiedBufferedReader) reader2; + } else { + bufferedInput2 = new LineEndUnifiedBufferedReader(reader2); + } + + /* + * We use this variable to mark if last char be '\n'. + * Because "a" and "a\n" is thought contentEqualsIgnoreEOL, + * but "\n" and "\n\n" is thought not contentEqualsIgnoreEOL. + */ + boolean justNewLine = true; + + int currentChar1; + int currentChar2; + + while (true) { + currentChar1 = bufferedInput1.peek(); + currentChar2 = bufferedInput2.peek(); + + if (currentChar1 == EOF) { + if (currentChar2 == EOF) { + return true; + } else { + if (!justNewLine) { + return inputOnlyHaveCRLForEOF( bufferedInput2, currentChar2); + } + return false; + } + } else if (currentChar2 == EOF) { + if (!justNewLine) { + return inputOnlyHaveCRLForEOF(bufferedInput1, currentChar1); + } + return false; + } + if (currentChar1 != currentChar2) { + return false; + } + justNewLine = currentChar1 == '\n'; + bufferedInput1.eat(); + bufferedInput2.eat(); + } + } + + /** + * private function used only in contentEqualsIgnoreEOL. + * used in contentEqualsIgnoreEOL to detect whether a input only have CRLF or EOF. + * @param input input reader + * @param currentChar current peek char of input + * @return true/false + * @throws IOException by input.read(), not me. + * @see #contentEqualsIgnoreEOL(Reader, Reader) + */ + private static boolean inputOnlyHaveCRLForEOF(LineEndUnifiedBufferedReader input, int currentChar) throws IOException { + + /* + * logically there should be some code like + * + * if (char1 == EOF) { + * return true; + * } + * + * here. + * + * But actually, if this input's read() is EOF, then we will not invoke this function at all. + * So the check is deleted. + * + * You can go contentEqualsIgnoreEOL for details. + */ + + if (currentChar == '\n') { + input.eat(); + return input.read() == EOF; + } + return false; + } + + + @Benchmark + public boolean[] testFileCurrent() throws IOException { + final boolean[] res = new boolean[3]; + try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET); + Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_B), DEFAULT_CHARSET)) { + res[0] = IOUtils.contentEqualsIgnoreEOL(input1, input1); + } + try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET); + Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET)) { + res[1] = IOUtils.contentEqualsIgnoreEOL(input1, input2); + } + try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_16K_A), DEFAULT_CHARSET); + Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_16K_A_COPY), + DEFAULT_CHARSET)) { + res[2] = IOUtils.contentEqualsIgnoreEOL(input1, input2); + } + return res; + } + + @Benchmark + public boolean[] testFilePr118() throws IOException { + final boolean[] res = new boolean[3]; + try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET); + Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_B), DEFAULT_CHARSET)) { + res[0] = contentEqualsIgnoreEOLPr118(input1, input1); + } + try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET); + Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET)) { + res[1] = contentEqualsIgnoreEOLPr118(input1, input2); + } + try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_16K_A)); + Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_16K_A_COPY))) { + res[2] = contentEqualsIgnoreEOLPr118(input1, input2); + } + return res; + } + + @Benchmark + public boolean[] testFileRelease_2_8_0() throws IOException { + final boolean[] res = new boolean[3]; + try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET); + Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_B), DEFAULT_CHARSET)) { + res[0] = contentEqualsIgnoreEOL_release_2_8_0(input1, input1); + } + try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET); + Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET)) { + res[1] = contentEqualsIgnoreEOL_release_2_8_0(input1, input2); + } + try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_16K_A), DEFAULT_CHARSET); + Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_16K_A_COPY), + DEFAULT_CHARSET)) { + res[2] = contentEqualsIgnoreEOL_release_2_8_0(input1, input2); + } + return res; + } + + @Benchmark + public void testStringCurrent(final Blackhole blackhole) throws IOException { + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { + try (StringReader input1 = new StringReader(STRINGS[i]); + StringReader input2 = new StringReader(STRINGS[j])) { + blackhole.consume(IOUtils.contentEqualsIgnoreEOL(input1, input2)); + } + } + } + } + + @Benchmark + public void testStringPr118(final Blackhole blackhole) throws IOException { + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { + try (StringReader input1 = new StringReader(STRINGS[i]); + StringReader input2 = new StringReader(STRINGS[j])) { + blackhole.consume(contentEqualsIgnoreEOLPr118(input1, input2)); + } + } + } + } + + @Benchmark + public void testStringRelease_2_8_0(final Blackhole blackhole) throws IOException { + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { + try (StringReader input1 = new StringReader(STRINGS[i]); + StringReader input2 = new StringReader(STRINGS[j])) { + blackhole.consume(contentEqualsIgnoreEOL_release_2_8_0(input1, input2)); + } + } + } + } + +}