From 394e76c48cd70166280e8d4b4967abe0de6ce308 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20R=C5=BCysko?= Date: Sun, 28 Apr 2024 08:56:52 +0200 Subject: [PATCH] Add schema-based parsing (#43) --- .gitignore | 1 + build.gradle | 36 +- .../org/simdjson/NumberParserBenchmark.java | 4 +- .../org/simdjson/ParseAndSelectBenchmark.java | 31 +- .../SchemaBasedParseAndSelectBenchmark.java | 123 ++ src/main/java/org/simdjson/BitIndexes.java | 37 +- src/main/java/org/simdjson/ClassResolver.java | 24 + .../org/simdjson/ConstructorArgument.java | 4 + .../org/simdjson/ConstructorArgumentsMap.java | 91 ++ src/main/java/org/simdjson/DoubleParser.java | 505 ++++++ .../java/org/simdjson/ExponentParser.java | 94 ++ src/main/java/org/simdjson/FloatParser.java | 504 ++++++ src/main/java/org/simdjson/JsonIterator.java | 43 +- .../org/simdjson/JsonParsingException.java | 4 + src/main/java/org/simdjson/JsonValue.java | 10 +- src/main/java/org/simdjson/NumberParser.java | 774 +++------- .../java/org/simdjson/NumberParserTables.java | 2 + .../org/simdjson/OnDemandJsonIterator.java | 675 ++++++++ src/main/java/org/simdjson/ResolvedClass.java | 165 ++ .../org/simdjson/SchemaBasedJsonIterator.java | 735 +++++++++ .../java/org/simdjson/SimdJsonParser.java | 19 +- src/main/java/org/simdjson/StringParser.java | 75 +- .../java/org/simdjson/StructuralIndexer.java | 5 +- src/main/java/org/simdjson/TapeBuilder.java | 41 +- .../simdjson/annotations/JsonFieldName.java | 13 + .../java/org/simdjson/ArrayParsingTest.java | 245 +++ .../simdjson/ArraySchemaBasedParsingTest.java | 503 ++++++ .../simdjson/BenchmarkCorrectnessTest.java | 65 +- .../java/org/simdjson/BooleanParsingTest.java | 121 ++ .../BooleanSchemaBasedParsingTest.java | 593 +++++++ ...tingPointNumberSchemaBasedParsingTest.java | 1297 ++++++++++++++++ .../IntegralNumberSchemaBasedParsingTest.java | 779 ++++++++++ .../java/org/simdjson/NullParsingTest.java | 106 ++ .../java/org/simdjson/NumberParsingTest.java | 153 +- .../java/org/simdjson/ObjectParsingTest.java | 72 +- .../ObjectSchemaBasedParsingTest.java | 821 ++++++++++ .../java/org/simdjson/SimdJsonParserTest.java | 323 ---- .../java/org/simdjson/StringParsingTest.java | 219 ++- .../StringSchemaBasedParsingTest.java | 1357 +++++++++++++++++ .../schemas/ClassWithIntegerField.java | 16 + .../ClassWithPrimitiveBooleanField.java | 16 + .../schemas/ClassWithPrimitiveByteField.java | 16 + .../ClassWithPrimitiveCharacterField.java | 16 + .../ClassWithPrimitiveDoubleField.java | 16 + .../schemas/ClassWithPrimitiveFloatField.java | 16 + .../ClassWithPrimitiveIntegerField.java | 16 + .../schemas/ClassWithPrimitiveLongField.java | 16 + .../schemas/ClassWithPrimitiveShortField.java | 16 + .../schemas/ClassWithStringField.java | 16 + .../schemas/RecordWithBooleanArrayField.java | 4 + .../schemas/RecordWithBooleanField.java | 5 + .../schemas/RecordWithBooleanListField.java | 6 + .../schemas/RecordWithByteArrayField.java | 4 + .../simdjson/schemas/RecordWithByteField.java | 5 + .../schemas/RecordWithByteListField.java | 6 + .../RecordWithCharacterArrayField.java | 4 + .../schemas/RecordWithCharacterField.java | 5 + .../schemas/RecordWithCharacterListField.java | 6 + .../schemas/RecordWithDoubleArrayField.java | 4 + .../schemas/RecordWithDoubleField.java | 5 + .../schemas/RecordWithDoubleListField.java | 6 + .../schemas/RecordWithFloatArrayField.java | 4 + .../schemas/RecordWithFloatField.java | 5 + .../schemas/RecordWithFloatListField.java | 6 + .../schemas/RecordWithIntegerArrayField.java | 4 + .../schemas/RecordWithIntegerField.java | 5 + .../schemas/RecordWithIntegerListField.java | 6 + .../schemas/RecordWithLongArrayField.java | 4 + .../simdjson/schemas/RecordWithLongField.java | 5 + .../schemas/RecordWithLongListField.java | 6 + .../RecordWithPrimitiveBooleanArrayField.java | 4 + .../RecordWithPrimitiveBooleanField.java | 5 + .../RecordWithPrimitiveByteArrayField.java | 4 + .../schemas/RecordWithPrimitiveByteField.java | 5 + ...ecordWithPrimitiveCharacterArrayField.java | 4 + .../RecordWithPrimitiveCharacterField.java | 5 + .../RecordWithPrimitiveDoubleArrayField.java | 4 + .../RecordWithPrimitiveDoubleField.java | 5 + .../RecordWithPrimitiveFloatArrayField.java | 4 + .../RecordWithPrimitiveFloatField.java | 5 + .../RecordWithPrimitiveIntegerArrayField.java | 4 + .../RecordWithPrimitiveIntegerField.java | 5 + .../RecordWithPrimitiveLongArrayField.java | 4 + .../schemas/RecordWithPrimitiveLongField.java | 5 + .../RecordWithPrimitiveShortArrayField.java | 5 + .../RecordWithPrimitiveShortField.java | 5 + .../schemas/RecordWithShortArrayField.java | 4 + .../schemas/RecordWithShortField.java | 5 + .../schemas/RecordWithShortListField.java | 6 + .../schemas/RecordWithStringArrayField.java | 4 + .../schemas/RecordWithStringField.java | 5 + .../schemas/RecordWithStringListField.java | 6 + .../simdjson/testutils/CartesianTestCsv.java | 16 + .../CartesianTestCsvArgumentsProvider.java | 25 + .../testutils/CartesianTestCsvRow.java | 39 + .../FloatingPointNumberTestFile.java | 82 + .../FloatingPointNumberTestFilesProvider.java | 34 + .../FloatingPointNumberTestFilesSource.java | 26 + .../{ => testutils}/JsonValueAssert.java | 17 +- .../java/org/simdjson/testutils/MapEntry.java | 10 + .../org/simdjson/testutils/MapSource.java | 18 + .../simdjson/testutils/MapSourceProvider.java | 39 + .../simdjson/testutils/NumberTestData.java | 42 + .../RandomIntegralNumberProvider.java | 125 ++ .../testutils/RandomIntegralNumberSource.java | 23 + .../testutils/RandomStringProvider.java | 58 + .../testutils/RandomStringSource.java | 22 + .../SchemaBasedRandomValueProvider.java | 232 +++ .../SchemaBasedRandomValueSource.java | 23 + .../testutils/SimdJsonAssertions.java | 11 + .../simdjson/testutils/StringTestData.java | 118 ++ 111 files changed, 10911 insertions(+), 1086 deletions(-) create mode 100644 src/jmh/java/org/simdjson/SchemaBasedParseAndSelectBenchmark.java create mode 100644 src/main/java/org/simdjson/ClassResolver.java create mode 100644 src/main/java/org/simdjson/ConstructorArgument.java create mode 100644 src/main/java/org/simdjson/ConstructorArgumentsMap.java create mode 100644 src/main/java/org/simdjson/DoubleParser.java create mode 100644 src/main/java/org/simdjson/ExponentParser.java create mode 100644 src/main/java/org/simdjson/FloatParser.java create mode 100644 src/main/java/org/simdjson/OnDemandJsonIterator.java create mode 100644 src/main/java/org/simdjson/ResolvedClass.java create mode 100644 src/main/java/org/simdjson/SchemaBasedJsonIterator.java create mode 100644 src/main/java/org/simdjson/annotations/JsonFieldName.java create mode 100644 src/test/java/org/simdjson/ArrayParsingTest.java create mode 100644 src/test/java/org/simdjson/ArraySchemaBasedParsingTest.java create mode 100644 src/test/java/org/simdjson/BooleanParsingTest.java create mode 100644 src/test/java/org/simdjson/BooleanSchemaBasedParsingTest.java create mode 100644 src/test/java/org/simdjson/FloatingPointNumberSchemaBasedParsingTest.java create mode 100644 src/test/java/org/simdjson/IntegralNumberSchemaBasedParsingTest.java create mode 100644 src/test/java/org/simdjson/NullParsingTest.java create mode 100644 src/test/java/org/simdjson/ObjectSchemaBasedParsingTest.java delete mode 100644 src/test/java/org/simdjson/SimdJsonParserTest.java create mode 100644 src/test/java/org/simdjson/StringSchemaBasedParsingTest.java create mode 100644 src/test/java/org/simdjson/schemas/ClassWithIntegerField.java create mode 100644 src/test/java/org/simdjson/schemas/ClassWithPrimitiveBooleanField.java create mode 100644 src/test/java/org/simdjson/schemas/ClassWithPrimitiveByteField.java create mode 100644 src/test/java/org/simdjson/schemas/ClassWithPrimitiveCharacterField.java create mode 100644 src/test/java/org/simdjson/schemas/ClassWithPrimitiveDoubleField.java create mode 100644 src/test/java/org/simdjson/schemas/ClassWithPrimitiveFloatField.java create mode 100644 src/test/java/org/simdjson/schemas/ClassWithPrimitiveIntegerField.java create mode 100644 src/test/java/org/simdjson/schemas/ClassWithPrimitiveLongField.java create mode 100644 src/test/java/org/simdjson/schemas/ClassWithPrimitiveShortField.java create mode 100644 src/test/java/org/simdjson/schemas/ClassWithStringField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithBooleanArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithBooleanField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithBooleanListField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithByteArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithByteField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithByteListField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithCharacterArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithCharacterField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithCharacterListField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithDoubleArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithDoubleField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithDoubleListField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithFloatArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithFloatField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithFloatListField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithIntegerArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithIntegerField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithIntegerListField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithLongArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithLongField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithLongListField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveBooleanArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveBooleanField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveByteArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveByteField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveCharacterArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveCharacterField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveDoubleArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveDoubleField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveFloatArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveFloatField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveIntegerArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveIntegerField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveLongArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveLongField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveShortArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithPrimitiveShortField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithShortArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithShortField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithShortListField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithStringArrayField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithStringField.java create mode 100644 src/test/java/org/simdjson/schemas/RecordWithStringListField.java create mode 100644 src/test/java/org/simdjson/testutils/CartesianTestCsv.java create mode 100644 src/test/java/org/simdjson/testutils/CartesianTestCsvArgumentsProvider.java create mode 100644 src/test/java/org/simdjson/testutils/CartesianTestCsvRow.java create mode 100644 src/test/java/org/simdjson/testutils/FloatingPointNumberTestFile.java create mode 100644 src/test/java/org/simdjson/testutils/FloatingPointNumberTestFilesProvider.java create mode 100644 src/test/java/org/simdjson/testutils/FloatingPointNumberTestFilesSource.java rename src/test/java/org/simdjson/{ => testutils}/JsonValueAssert.java (81%) create mode 100644 src/test/java/org/simdjson/testutils/MapEntry.java create mode 100644 src/test/java/org/simdjson/testutils/MapSource.java create mode 100644 src/test/java/org/simdjson/testutils/MapSourceProvider.java create mode 100644 src/test/java/org/simdjson/testutils/NumberTestData.java create mode 100644 src/test/java/org/simdjson/testutils/RandomIntegralNumberProvider.java create mode 100644 src/test/java/org/simdjson/testutils/RandomIntegralNumberSource.java create mode 100644 src/test/java/org/simdjson/testutils/RandomStringProvider.java create mode 100644 src/test/java/org/simdjson/testutils/RandomStringSource.java create mode 100644 src/test/java/org/simdjson/testutils/SchemaBasedRandomValueProvider.java create mode 100644 src/test/java/org/simdjson/testutils/SchemaBasedRandomValueSource.java create mode 100644 src/test/java/org/simdjson/testutils/SimdJsonAssertions.java create mode 100644 src/test/java/org/simdjson/testutils/StringTestData.java diff --git a/.gitignore b/.gitignore index 5241245..6b6051c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ build profilers testdata +hotspot_*.log \ No newline at end of file diff --git a/build.gradle b/build.gradle index 2cae88f..8fb175d 100644 --- a/build.gradle +++ b/build.gradle @@ -1,6 +1,7 @@ import me.champeau.jmh.JmhBytecodeGeneratorTask -import org.gradle.internal.os.OperatingSystem import org.ajoberstar.grgit.Grgit +import org.gradle.internal.os.OperatingSystem + import java.time.Duration plugins { @@ -42,20 +43,20 @@ java { } ext { - junitVersion = '5.9.1' - jsoniterScalaVersion = '2.24.4' + junitVersion = '5.10.2' + jsoniterScalaVersion = '2.28.4' } dependencies { - jmhImplementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.16.0' - jmhImplementation group: 'com.alibaba.fastjson2', name: 'fastjson2', version: '2.0.42' - jmhImplementation group: 'com.jsoniter', name: 'jsoniter', version: '0.9.23' + jmhImplementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.17.0' + jmhImplementation group: 'com.alibaba.fastjson2', name: 'fastjson2', version: '2.0.49' jmhImplementation group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-core_2.13', version: jsoniterScalaVersion jmhImplementation group: 'com.google.guava', name: 'guava', version: '32.1.2-jre' compileOnly group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-macros_2.13', version: jsoniterScalaVersion testImplementation group: 'org.assertj', name: 'assertj-core', version: '3.24.2' testImplementation group: 'org.apache.commons', name: 'commons-text', version: '1.10.0' + testImplementation group: 'org.junit-pioneer', name: 'junit-pioneer', version: '2.2.0' testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-api', version: junitVersion testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-params', version: junitVersion testRuntimeOnly group: 'org.junit.jupiter', name: 'junit-jupiter-engine', version: junitVersion @@ -136,15 +137,21 @@ jmh { '--add-modules=jdk.incubator.vector' ] if (getBooleanProperty('jmh.profilersEnabled', false)) { + createDirIfDoesNotExist('./profilers') if (OperatingSystem.current().isLinux()) { - profilers = [ - 'perf', - 'perfasm:intelSyntax=true', - 'async:verbose=true;output=flamegraph;event=cpu;dir=./profilers/async;libPath=' + getAsyncProfilerLibPath('LD_LIBRARY_PATH') + def profilerList = [ + 'async:verbose=true;output=flamegraph;event=cpu;dir=./profilers/async;libPath=' + getLibPath('LD_LIBRARY_PATH') ] + if (getBooleanProperty('jmh.jitLogEnabled', false)) { + createDirIfDoesNotExist('./profilers/perfasm') + profilerList += [ + 'perfasm:intelSyntax=true;saveLog=true;saveLogTo=./profilers/perfasm' + ] + } + profilers = profilerList } else if (OperatingSystem.current().isMacOsX()) { profilers = [ - 'async:verbose=true;output=flamegraph;event=cpu;dir=./profilers/async;libPath=' + getAsyncProfilerLibPath('DYLD_LIBRARY_PATH') + 'async:verbose=true;output=flamegraph;event=cpu;dir=./profilers/async;libPath=' + getLibPath('DYLD_LIBRARY_PATH') ] } } @@ -218,6 +225,11 @@ def getBooleanProperty(String name, boolean defaultValue) { Boolean.valueOf((project.findProperty(name) ?: defaultValue) as String) } -static def getAsyncProfilerLibPath(String envVarName) { +static def getLibPath(String envVarName) { System.getenv(envVarName) ?: System.getProperty('java.library.path') } + +static createDirIfDoesNotExist(String dir) { + File file = new File(dir) + file.mkdirs() +} diff --git a/src/jmh/java/org/simdjson/NumberParserBenchmark.java b/src/jmh/java/org/simdjson/NumberParserBenchmark.java index 1b8c9dd..f73dd83 100644 --- a/src/jmh/java/org/simdjson/NumberParserBenchmark.java +++ b/src/jmh/java/org/simdjson/NumberParserBenchmark.java @@ -21,7 +21,7 @@ public class NumberParserBenchmark { private final Tape tape = new Tape(100); - private final NumberParser numberParser = new NumberParser(tape); + private final NumberParser numberParser = new NumberParser(); @Param({ "2.2250738585072013e-308", // fast path @@ -43,7 +43,7 @@ public double baseline() { @Benchmark public double simdjson() { tape.reset(); - numberParser.parseNumber(numberUtf8Bytes, 0); + numberParser.parseNumber(numberUtf8Bytes, 0, tape); return tape.getDouble(0); } } diff --git a/src/jmh/java/org/simdjson/ParseAndSelectBenchmark.java b/src/jmh/java/org/simdjson/ParseAndSelectBenchmark.java index a37135c..fcb056f 100644 --- a/src/jmh/java/org/simdjson/ParseAndSelectBenchmark.java +++ b/src/jmh/java/org/simdjson/ParseAndSelectBenchmark.java @@ -4,10 +4,6 @@ import com.alibaba.fastjson2.JSONObject; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; -import com.github.plokhotnyuk.jsoniter_scala.core.ReaderConfig$; -import com.github.plokhotnyuk.jsoniter_scala.core.package$; -import com.jsoniter.JsonIterator; -import com.jsoniter.any.Any; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Level; @@ -43,19 +39,7 @@ public void setup() throws IOException { buffer = is.readAllBytes(); bufferPadded = padded(buffer); } - } - - @Benchmark - public int countUniqueUsersWithDefaultProfile_jsoniter_scala() throws IOException { - Twitter twitter = package$.MODULE$.readFromArray(buffer, ReaderConfig$.MODULE$, Twitter$.MODULE$.codec()); - Set defaultUsers = new HashSet<>(); - for (Status tweet: twitter.statuses()) { - User user = tweet.user(); - if (user.default_profile()) { - defaultUsers.add(user.screen_name()); - } - } - return defaultUsers.size(); + System.out.println("VectorSpecies = " + StructuralIndexer.BYTE_SPECIES); } @Benchmark @@ -88,19 +72,6 @@ public int countUniqueUsersWithDefaultProfile_fastjson() { return defaultUsers.size(); } - @Benchmark - public int countUniqueUsersWithDefaultProfile_jsoniter() { - Any json = JsonIterator.deserialize(buffer); - Set defaultUsers = new HashSet<>(); - for (Any tweet : json.get("statuses")) { - Any user = tweet.get("user"); - if (user.get("default_profile").toBoolean()) { - defaultUsers.add(user.get("screen_name").toString()); - } - } - return defaultUsers.size(); - } - @Benchmark public int countUniqueUsersWithDefaultProfile_simdjson() { JsonValue simdJsonValue = simdJsonParser.parse(buffer, buffer.length); diff --git a/src/jmh/java/org/simdjson/SchemaBasedParseAndSelectBenchmark.java b/src/jmh/java/org/simdjson/SchemaBasedParseAndSelectBenchmark.java new file mode 100644 index 0000000..a001e3f --- /dev/null +++ b/src/jmh/java/org/simdjson/SchemaBasedParseAndSelectBenchmark.java @@ -0,0 +1,123 @@ +package org.simdjson; + +import com.alibaba.fastjson2.JSON; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.github.plokhotnyuk.jsoniter_scala.core.ReaderConfig$; +import com.github.plokhotnyuk.jsoniter_scala.core.package$; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +import java.io.IOException; +import java.io.InputStream; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import static org.simdjson.SimdJsonPaddingUtil.padded; + +@State(Scope.Benchmark) +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +public class SchemaBasedParseAndSelectBenchmark { + + private final SimdJsonParser simdJsonParser = new SimdJsonParser(); + private final ObjectMapper objectMapper = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + + private byte[] buffer; + private byte[] bufferPadded; + + @Setup(Level.Trial) + public void setup() throws IOException { + try (InputStream is = ParseBenchmark.class.getResourceAsStream("/twitter.json")) { + buffer = is.readAllBytes(); + bufferPadded = padded(buffer); + } + System.out.println("VectorSpecies = " + StructuralIndexer.BYTE_SPECIES); + } + + @Benchmark + public int countUniqueUsersWithDefaultProfile_simdjson() { + Set defaultUsers = new HashSet<>(); + SimdJsonTwitter twitter = simdJsonParser.parse(buffer, buffer.length, SimdJsonTwitter.class); + for (SimdJsonStatus status : twitter.statuses()) { + SimdJsonUser user = status.user(); + if (user.default_profile()) { + defaultUsers.add(user.screen_name()); + } + } + return defaultUsers.size(); + } + + @Benchmark + public int countUniqueUsersWithDefaultProfile_simdjsonPadded() { + Set defaultUsers = new HashSet<>(); + SimdJsonTwitter twitter = simdJsonParser.parse(bufferPadded, buffer.length, SimdJsonTwitter.class); + for (SimdJsonStatus status : twitter.statuses()) { + SimdJsonUser user = status.user(); + if (user.default_profile()) { + defaultUsers.add(user.screen_name()); + } + } + return defaultUsers.size(); + } + + @Benchmark + public int countUniqueUsersWithDefaultProfile_jackson() throws IOException { + Set defaultUsers = new HashSet<>(); + SimdJsonTwitter twitter = objectMapper.readValue(buffer, SimdJsonTwitter.class); + for (SimdJsonStatus status : twitter.statuses()) { + SimdJsonUser user = status.user(); + if (user.default_profile()) { + defaultUsers.add(user.screen_name()); + } + } + return defaultUsers.size(); + } + + @Benchmark + public int countUniqueUsersWithDefaultProfile_jsoniter_scala() { + Twitter twitter = package$.MODULE$.readFromArray(buffer, ReaderConfig$.MODULE$, Twitter$.MODULE$.codec()); + Set defaultUsers = new HashSet<>(); + for (Status tweet: twitter.statuses()) { + User user = tweet.user(); + if (user.default_profile()) { + defaultUsers.add(user.screen_name()); + } + } + return defaultUsers.size(); + } + + @Benchmark + public int countUniqueUsersWithDefaultProfile_fastjson() { + Set defaultUsers = new HashSet<>(); + SimdJsonTwitter twitter = JSON.parseObject(buffer, SimdJsonTwitter.class); + for (SimdJsonStatus status : twitter.statuses()) { + SimdJsonUser user = status.user(); + if (user.default_profile()) { + defaultUsers.add(user.screen_name()); + } + } + return defaultUsers.size(); + } + + record SimdJsonUser(boolean default_profile, String screen_name) { + + } + + record SimdJsonStatus(SimdJsonUser user) { + + } + + record SimdJsonTwitter(List statuses) { + + } +} diff --git a/src/main/java/org/simdjson/BitIndexes.java b/src/main/java/org/simdjson/BitIndexes.java index 4ab1dde..59c0dc3 100644 --- a/src/main/java/org/simdjson/BitIndexes.java +++ b/src/main/java/org/simdjson/BitIndexes.java @@ -44,11 +44,26 @@ private long clearLowestBit(long bits) { return bits & (bits - 1); } - int advance() { + void advance() { + readIdx++; + } + + int getAndAdvance() { + assert readIdx <= writeIdx; return indexes[readIdx++]; } + int getLast() { + return indexes[writeIdx - 1]; + } + + int advanceAndGet() { + assert readIdx + 1 <= writeIdx; + return indexes[++readIdx]; + } + int peek() { + assert readIdx <= writeIdx; return indexes[readIdx]; } @@ -60,6 +75,26 @@ boolean isEnd() { return writeIdx == readIdx; } + boolean isPastEnd() { + return readIdx > writeIdx; + } + + void finish() { + // If we go past the end of the detected structural indexes, it means we are dealing with an invalid JSON. + // Thus, we need to stop processing immediately and throw an exception. To avoid checking after every increment + // of readIdx whether this has happened, we jump to the first structural element. This should produce the + // desired outcome, i.e., an iterator should detect invalid JSON. To understand how this works, let's first + // exclude primitive values (numbers, strings, booleans, nulls) from the scope of possible JSON documents. We + // can do this because, when these values are parsed, the length of the input buffer is verified, ensuring we + // never go past its end. Therefore, we can focus solely on objects and arrays. Since we always check that if + // the first character is '{', the last one must be '}', and if the first character is '[', the last one must + // be ']', we know that if we've reached beyond the buffer without crashing, the input is either '{...}' or '[...]'. + // Thus, if we jump to the first structural element, we will generate either '{...}{' or '[...]['. Both of these + // are invalid sequences and will be detected by the iterator, which will then stop processing and throw an + // exception informing about the invalid JSON. + indexes[writeIdx] = 0; + } + void reset() { writeIdx = 0; readIdx = 0; diff --git a/src/main/java/org/simdjson/ClassResolver.java b/src/main/java/org/simdjson/ClassResolver.java new file mode 100644 index 0000000..613aa2f --- /dev/null +++ b/src/main/java/org/simdjson/ClassResolver.java @@ -0,0 +1,24 @@ +package org.simdjson; + +import java.lang.reflect.Type; +import java.util.HashMap; +import java.util.Map; + +class ClassResolver { + + private final Map classCache = new HashMap<>(); + + ResolvedClass resolveClass(Type type) { + ResolvedClass resolvedClass = classCache.get(type); + if (resolvedClass != null) { + return resolvedClass; + } + resolvedClass = new ResolvedClass(type, this); + classCache.put(type, resolvedClass); + return resolvedClass; + } + + void reset() { + classCache.clear(); + } +} diff --git a/src/main/java/org/simdjson/ConstructorArgument.java b/src/main/java/org/simdjson/ConstructorArgument.java new file mode 100644 index 0000000..05a68a9 --- /dev/null +++ b/src/main/java/org/simdjson/ConstructorArgument.java @@ -0,0 +1,4 @@ +package org.simdjson; + +record ConstructorArgument(int idx, ResolvedClass resolvedClass) { +} diff --git a/src/main/java/org/simdjson/ConstructorArgumentsMap.java b/src/main/java/org/simdjson/ConstructorArgumentsMap.java new file mode 100644 index 0000000..259c299 --- /dev/null +++ b/src/main/java/org/simdjson/ConstructorArgumentsMap.java @@ -0,0 +1,91 @@ +package org.simdjson; + +import java.lang.invoke.VarHandle; +import java.nio.ByteOrder; +import java.util.Arrays; + +import static java.lang.invoke.MethodHandles.byteArrayViewVarHandle; + +class ConstructorArgumentsMap { + + private static final VarHandle VAR_HANDLE_LONG = byteArrayViewVarHandle(Long.TYPE.arrayType(), ByteOrder.nativeOrder()); + private static final VarHandle VAR_HANDLE_INT = byteArrayViewVarHandle(Integer.TYPE.arrayType(), ByteOrder.nativeOrder()); + // Large prime number. This one is taken from https://vanilla-java.github.io/2018/08/15/Looking-at-randomness-and-performance-for-hash-codes.html + private static final long M2 = 0x7a646e4d; + + private final int argumentCount; + private final int capacity; + private final int moduloMask; + private final byte[][] keys; + private final ConstructorArgument[] arguments; + + ConstructorArgumentsMap(int argumentCount) { + this.argumentCount = argumentCount; + this.capacity = ceilingPowerOfTwo(argumentCount); + this.moduloMask = capacity - 1; + this.arguments = new ConstructorArgument[capacity]; + this.keys = new byte[capacity][]; + } + + private static int ceilingPowerOfTwo(int argumentCount) { + // We don't need to check if argumentCount is greater than 2^30 because, in Java, the limit for method arguments + // is equal to 255 (https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.3.3). + return 1 << -Integer.numberOfLeadingZeros(argumentCount - 1); + } + + int getArgumentCount() { + return argumentCount; + } + + void put(byte[] fieldName, ConstructorArgument argument) { + int place = findPlace(fieldName, fieldName.length); + + while (keys[place] != null) { + place = (place + 1) & moduloMask; + } + arguments[place] = argument; + keys[place] = fieldName; + } + + ConstructorArgument get(byte[] buffer, int len) { + int place = findPlace(buffer, len); + for (int i = 0; i < capacity; i++) { + byte[] key = keys[place]; + if (Arrays.equals(key, 0, key.length, buffer, 0, len)) { + return arguments[place]; + } + place = (place + 1) & moduloMask; + } + return null; + } + + private int findPlace(byte[] buffer, int len) { + int hash = hash(buffer, len); + return hash & moduloMask; + } + + private static int hash(byte[] data, int len) { + long h = 0; + int i = 0; + for (; i + 7 < len; i += 8) { + h = h * M2 + getLongFromArray(data, i); + } + if (i + 3 < len) { + h = h * M2 + getIntFromArray(data, i); + i += 4; + } + for (; i < len; i++) { + h = h * M2 + data[i]; + } + h *= M2; + return (int) (h ^ h >>> 32); + } + + private static int getIntFromArray(byte[] value, int i) { + return (int) VAR_HANDLE_INT.get(value, i); + } + + private static long getLongFromArray(byte[] value, int i) { + return (long) VAR_HANDLE_LONG.get(value, i); + } +} diff --git a/src/main/java/org/simdjson/DoubleParser.java b/src/main/java/org/simdjson/DoubleParser.java new file mode 100644 index 0000000..c5927f9 --- /dev/null +++ b/src/main/java/org/simdjson/DoubleParser.java @@ -0,0 +1,505 @@ +package org.simdjson; + +import static java.lang.Double.NEGATIVE_INFINITY; +import static java.lang.Double.POSITIVE_INFINITY; +import static java.lang.Double.longBitsToDouble; +import static java.lang.Long.compareUnsigned; +import static java.lang.Long.divideUnsigned; +import static java.lang.Long.numberOfLeadingZeros; +import static java.lang.Long.remainderUnsigned; +import static java.lang.Math.abs; +import static java.lang.Math.unsignedMultiplyHigh; +import static org.simdjson.ExponentParser.isExponentIndicator; +import static org.simdjson.NumberParserTables.MIN_POWER_OF_FIVE; +import static org.simdjson.NumberParserTables.NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT; +import static org.simdjson.NumberParserTables.POWERS_OF_FIVE; +import static org.simdjson.NumberParserTables.POWER_OF_FIVE_DIGITS; + +class DoubleParser { + + // When parsing doubles, we assume that a long used to store digits is unsigned. Thus, it can safely accommodate + // up to 19 digits (9999999999999999999 < 2^64). + private static final int FAST_PATH_MAX_DIGIT_COUNT = 19; + // The smallest non-zero number representable in binary64 is 2^-1074, which is about 4.941 * 10^-324. + // If we consider a number in the form of w * 10^q where 1 <= w <= 9999999999999999999, then + // 1 * 10^q <= w * 10^q <= 9.999999999999999999 * 10^18 * 10^q. To ensure w * 10^q < 2^-1074, q must satisfy the + // following inequality: 9.999999999999999999 * 10^(18 + q) < 2^-1074. This condition holds true whenever + // 18 + q < -324. Thus, for q < -342, we can reliably conclude that the number w * 10^q is smaller than 2^-1074, + // and this, in turn means the number is equal to zero. + private static final int FAST_PATH_MIN_POWER_OF_TEN = -342; + // We know that (1 - 2^-53) * 2^1024, which is about 1.798 * 10^308, is the largest number representable in binary64. + // When the parsed number is expressed as w * 10^q, where w >= 1, we are sure that for any q > 308, the number is + // infinite. + private static final int FAST_PATH_MAX_POWER_OF_TEN = 308; + private static final double[] POWERS_OF_TEN = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22 + }; + private static final long MAX_LONG_REPRESENTED_AS_DOUBLE_EXACTLY = (1L << 53) - 1; + private static final int IEEE64_EXPONENT_BIAS = 1023; + private static final int IEEE64_SIGN_BIT_INDEX = 63; + private static final int IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT = 52; + private static final int IEEE64_SIGNIFICAND_SIZE_IN_BITS = IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT + 1; + private static final int IEEE64_MAX_FINITE_NUMBER_EXPONENT = 1023; + private static final int IEEE64_MIN_FINITE_NUMBER_EXPONENT = -1022; + private static final int IEEE64_SUBNORMAL_EXPONENT = -1023; + // This is the upper limit for the count of decimal digits taken into account in the slow path. All digits exceeding + // this threshold are excluded. + private static final int SLOW_PATH_MAX_DIGIT_COUNT = 800; + private static final int SLOW_PATH_MAX_SHIFT = 60; + private static final byte[] SLOW_PATH_SHIFTS = { + 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, + 33, 36, 39, 43, 46, 49, 53, 56, 59, + }; + private static final long MULTIPLICATION_MASK = 0xFFFFFFFFFFFFFFFFL >>> IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT + 3; + + private final SlowPathDecimal slowPathDecimal = new SlowPathDecimal(); + private final ExponentParser exponentParser = new ExponentParser(); + + double parse(byte[] buffer, int offset, boolean negative, int digitsStartIdx, int digitCount, long digits, long exponent) { + if (shouldBeHandledBySlowPath(buffer, digitsStartIdx, digitCount)) { + return slowlyParseDouble(buffer, offset); + } else { + return computeDouble(negative, digits, exponent); + } + } + + private static boolean shouldBeHandledBySlowPath(byte[] buffer, int startDigitsIdx, int digitCount) { + if (digitCount <= FAST_PATH_MAX_DIGIT_COUNT) { + return false; + } + int start = startDigitsIdx; + while (buffer[start] == '0' || buffer[start] == '.') { + start++; + } + int significantDigitCount = digitCount - (start - startDigitsIdx); + return significantDigitCount > FAST_PATH_MAX_DIGIT_COUNT; + } + + private static double computeDouble(boolean negative, long significand10, long exp10) { + if (abs(exp10) < POWERS_OF_TEN.length && compareUnsigned(significand10, MAX_LONG_REPRESENTED_AS_DOUBLE_EXACTLY) <= 0) { + // This path has been described in https://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/. + double result = significand10; + if (exp10 < 0) { + result = result / POWERS_OF_TEN[(int) -exp10]; + } else { + result = result * POWERS_OF_TEN[(int) exp10]; + } + return negative ? -result : result; + } + + // The following path is an implementation of the Eisel-Lemire algorithm described by Daniel Lemire in + // "Number Parsing at a Gigabyte per Second" (https://arxiv.org/abs/2101.11408). + + if (exp10 < FAST_PATH_MIN_POWER_OF_TEN || significand10 == 0) { + return zero(negative); + } else if (exp10 > FAST_PATH_MAX_POWER_OF_TEN) { + return infinity(negative); + } + + // We start by normalizing the decimal significand so that it is within the range of [2^63, 2^64). + int lz = numberOfLeadingZeros(significand10); + significand10 <<= lz; + + // Initially, the number we are parsing is in the form of w * 10^q = w * 5^q * 2^q, and our objective is to + // convert it to m * 2^p. We can represent w * 10^q as w * 5^q * 2^r * 2^p, where w * 5^q * 2^r = m. + // Therefore, in the next step we compute w * 5^q. The implementation of this multiplication is optimized + // to minimize necessary operations while ensuring precise results. For more information, refer to the + // aforementioned paper. + int powersOfFiveTableIndex = 2 * (int) (exp10 - MIN_POWER_OF_FIVE); + long upper = unsignedMultiplyHigh(significand10, POWERS_OF_FIVE[powersOfFiveTableIndex]); + long lower = significand10 * POWERS_OF_FIVE[powersOfFiveTableIndex]; + if ((upper & MULTIPLICATION_MASK) == MULTIPLICATION_MASK) { + long secondUpper = unsignedMultiplyHigh(significand10, POWERS_OF_FIVE[powersOfFiveTableIndex + 1]); + lower += secondUpper; + if (compareUnsigned(secondUpper, lower) > 0) { + upper++; + } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without Fallback" + // (https://arxiv.org/abs/2212.06644), at this point we are sure that the product is sufficiently accurate, + // and more computation is not needed. + } + + // Here, we extract the binary significand from the product. Although in binary64 the significand has 53 bits, + // we extract 54 bits to use the least significant bit for rounding. Since both the decimal significand and the + // values stored in POWERS_OF_FIVE are normalized, ensuring that their most significant bits are set, the product + // has either 0 or 1 leading zeros. As a result, we need to perform a right shift of either 9 or 10 bits. + long upperBit = upper >>> 63; + long upperShift = upperBit + 9; + long significand2 = upper >>> upperShift; + + // Now, we have to determine the value of the binary exponent. Let's begin by calculating the contribution of + // 10^q. Our goal is to compute f0 and f1 such that: + // - when q >= 0: 10^q = (5^q / 2^(f0 - q)) * 2^f0 + // - when q < 0: 10^q = (2^(f1 - q) / 5^-q) * 2^f1 + // Both (5^q / 2^(f0 - q)) and (2^(f1 - q) / 5^-q) must fall within the range of [1, 2). + // It turns out that these conditions are met when: + // - 0 <= q <= FAST_PATH_MAX_POWER_OF_TEN, and f0 = floor(log2(5^q)) + q = floor(q * log(5) / log(2)) + q = (217706 * q) / 2^16. + // - FAST_PATH_MIN_POWER_OF_TEN <= q < 0, and f1 = -ceil(log2(5^-q)) + q = -ceil(-q * log(5) / log(2)) + q = (217706 * q) / 2^16. + // Thus, we can express the contribution of 10^q to the exponent as (217706 * exp10) >> 16. + // + // Furthermore, we need to factor in the following normalizations we've performed: + // - shifting the decimal significand left bitwise + // - shifting the binary significand right bitwise if the most significant bit of the product was 1 + // Therefore, we add (63 - lz + upperBit) to the exponent. + long exp2 = ((217706 * exp10) >> 16) + 63 - lz + upperBit; + if (exp2 < IEEE64_MIN_FINITE_NUMBER_EXPONENT) { + // In the next step, we right-shift the binary significand by the difference between the minimum exponent + // and the binary exponent. In Java, the shift distance is limited to the range of 0 to 63, inclusive. + // Thus, we need to handle the case when the distance is >= 64 separately and always return zero. + if (exp2 <= IEEE64_MIN_FINITE_NUMBER_EXPONENT - 64) { + return zero(negative); + } + + // In this branch, it is likely that we are handling a subnormal number. Therefore, we adjust the significand + // to conform to the formula representing subnormal numbers: (significand2 * 2^(1 - IEEE64_EXPONENT_BIAS)) / 2^52. + significand2 >>= 1 - IEEE64_EXPONENT_BIAS - exp2; + // Round up if the significand is odd and remove the least significant bit that we've left for rounding. + significand2 += significand2 & 1; + significand2 >>= 1; + + // Here, we are addressing a scenario in which the original number was subnormal, but it became normal after + // rounding up. For example, when we are parsing 2.2250738585072013e-308 before rounding and removing the + // least significant bit significand2 = 0x3fffffffffffff and exp2 = -1023. After rounding, we get + // significand2 = 0x10000000000000, which is the significand of the smallest normal number. + exp2 = (significand2 < (1L << 52)) ? IEEE64_SUBNORMAL_EXPONENT : IEEE64_MIN_FINITE_NUMBER_EXPONENT; + return toDouble(negative, significand2, exp2); + } + + // Here, we are addressing a scenario of rounding the binary significand when it falls precisely halfway + // between two integers. To understand the rationale behind the conditions used to identify this case, refer to + // sections 6, 8.1, and 9.1 of "Number Parsing at a Gigabyte per Second". + if (exp10 >= -4 && exp10 <= 23) { + if ((significand2 << upperShift == upper) && (compareUnsigned(lower, 1) <= 0)) { + if ((significand2 & 3) == 1) { + significand2 &= ~1; + } + } + } + + // Round up if the significand is odd and remove the least significant bit that we've left for rounding. + significand2 += significand2 & 1; + significand2 >>= 1; + + if (significand2 == (1L << IEEE64_SIGNIFICAND_SIZE_IN_BITS)) { + // If we've reached here, it means that rounding has caused an overflow. We need to divide the significand + // by 2 and update the exponent accordingly. + significand2 >>= 1; + exp2++; + } + + if (exp2 > IEEE64_MAX_FINITE_NUMBER_EXPONENT) { + return infinity(negative); + } + return toDouble(negative, significand2, exp2); + } + + private static double toDouble(boolean negative, long significand2, long exp2) { + long bits = significand2; + bits &= ~(1L << IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT); // clear the implicit bit + bits |= (exp2 + IEEE64_EXPONENT_BIAS) << IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT; + bits = negative ? (bits | (1L << IEEE64_SIGN_BIT_INDEX)) : bits; + return longBitsToDouble(bits); + } + + private static double infinity(boolean negative) { + return negative ? NEGATIVE_INFINITY : POSITIVE_INFINITY; + } + + private static double zero(boolean negative) { + return negative ? -0.0 : 0.0; + } + + // The following parser is based on the idea described in + // https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html and implemented in + // https://github.com/simdjson/simdjson/blob/caff09cafceb0f5f6fc9109236d6dd09ac4bc0d8/src/from_chars.cpp + private double slowlyParseDouble(byte[] buffer, int offset) { + final SlowPathDecimal decimal = slowPathDecimal; + decimal.reset(); + + decimal.negative = buffer[offset] == '-'; + int currentIdx = decimal.negative ? offset + 1 : offset; + long exp10 = 0; + + currentIdx = skipZeros(buffer, currentIdx); + currentIdx = parseDigits(buffer, decimal, currentIdx); + if (buffer[currentIdx] == '.') { + currentIdx++; + int firstIdxAfterPeriod = currentIdx; + if (decimal.digitCount == 0) { + currentIdx = skipZeros(buffer, currentIdx); + } + currentIdx = parseDigits(buffer, decimal, currentIdx); + exp10 = firstIdxAfterPeriod - currentIdx; + } + + int currentIdxMovingBackwards = currentIdx - 1; + int trailingZeros = 0; + // Here, we also skip the period to handle cases like 100000000000000000000.000000 + while (buffer[currentIdxMovingBackwards] == '0' || buffer[currentIdxMovingBackwards] == '.') { + if (buffer[currentIdxMovingBackwards] == '0') { + trailingZeros++; + } + currentIdxMovingBackwards--; + } + exp10 += decimal.digitCount; + decimal.digitCount -= trailingZeros; + + if (decimal.digitCount > SLOW_PATH_MAX_DIGIT_COUNT) { + decimal.digitCount = SLOW_PATH_MAX_DIGIT_COUNT; + decimal.truncated = true; + } + + if (isExponentIndicator(buffer[currentIdx])) { + currentIdx++; + exp10 = exponentParser.parse(buffer, currentIdx, exp10).exponent(); + } + + // At this point, the number we are parsing is represented in the following way: w * 10^exp10, where -1 < w < 1. + if (exp10 <= -324) { + // We know that -1e-324 < w * 10^exp10 < 1e-324. In binary64 -1e-324 = -0.0 and 1e-324 = +0.0, so we can + // safely return +/-0.0. + return zero(decimal.negative); + } else if (exp10 >= 310) { + // We know that either w * 10^exp10 <= -0.1e310 or w * 10^exp10 >= 0.1e310. + // In binary64 -0.1e310 = -inf and 0.1e310 = +inf, so we can safely return +/-inf. + return infinity(decimal.negative); + } + + decimal.exp10 = (int) exp10; + int exp2 = 0; + + // We start the following loop with the decimal in the form of w * 10^exp10. After a series of + // right-shifts (dividing by a power of 2), we transform the decimal into w' * 2^exp2 * 10^exp10', + // where exp10' is <= 0. Resultantly, w' * 10^exp10' is in the range of [0, 1). + while (decimal.exp10 > 0) { + int shift = resolveShiftDistanceBasedOnExponent10(decimal.exp10); + decimal.shiftRight(shift); + exp2 += shift; + } + + // Now, we are left-shifting to get to the point where w'' * 10^exp10'' is within the range of [1/2, 1). + while (decimal.exp10 <= 0) { + int shift; + if (decimal.exp10 == 0) { + if (decimal.digits[0] >= 5) { + break; + } + shift = (decimal.digits[0] < 2) ? 2 : 1; + } else { + shift = resolveShiftDistanceBasedOnExponent10(-decimal.exp10); + } + decimal.shiftLeft(shift); + exp2 -= shift; + } + + // Here, w'' * 10^exp10'' falls within the range of [1/2, 1). In binary64, the significand must be within the + // range of [1, 2). We can get to the target range by decreasing the binary exponent. Resultantly, the decimal + // is represented as w'' * 10^exp10'' * 2^exp2, where w'' * 10^exp10'' is in the range of [1, 2). + exp2--; + + while (IEEE64_MIN_FINITE_NUMBER_EXPONENT > exp2) { + int n = IEEE64_MIN_FINITE_NUMBER_EXPONENT - exp2; + if (n > SLOW_PATH_MAX_SHIFT) { + n = SLOW_PATH_MAX_SHIFT; + } + decimal.shiftRight(n); + exp2 += n; + } + + // To conform to the IEEE 754 standard, the binary significand must fall within the range of [2^52, 2^53). Hence, + // we perform the following multiplication. If, after this step, the significand is less than 2^52, we have a + // subnormal number, which we will address later. + decimal.shiftLeft(IEEE64_SIGNIFICAND_SIZE_IN_BITS); + + long significand2 = decimal.computeSignificand(); + if (significand2 >= (1L << IEEE64_SIGNIFICAND_SIZE_IN_BITS)) { + // If we've reached here, it means that rounding has caused an overflow. We need to divide the significand + // by 2 and update the exponent accordingly. + significand2 >>= 1; + exp2++; + } + + if (significand2 < (1L << IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT)) { + exp2 = IEEE64_SUBNORMAL_EXPONENT; + } + if (exp2 > IEEE64_MAX_FINITE_NUMBER_EXPONENT) { + return infinity(decimal.negative); + } + return toDouble(decimal.negative, significand2, exp2); + } + + private static int resolveShiftDistanceBasedOnExponent10(int exp10) { + return (exp10 < SLOW_PATH_SHIFTS.length) ? SLOW_PATH_SHIFTS[exp10] : SLOW_PATH_MAX_SHIFT; + } + + private int skipZeros(byte[] buffer, int currentIdx) { + while (buffer[currentIdx] == '0') { + currentIdx++; + } + return currentIdx; + } + + private int parseDigits(byte[] buffer, SlowPathDecimal decimal, int currentIdx) { + while (isDigit(buffer[currentIdx])) { + if (decimal.digitCount < SLOW_PATH_MAX_DIGIT_COUNT) { + decimal.digits[decimal.digitCount] = convertCharacterToDigit(buffer[currentIdx]); + } + decimal.digitCount++; + currentIdx++; + } + return currentIdx; + } + + private static byte convertCharacterToDigit(byte b) { + return (byte) (b - '0'); + } + + private static boolean isDigit(byte b) { + return b >= '0' && b <= '9'; + } + + private static class SlowPathDecimal { + + final byte[] digits = new byte[SLOW_PATH_MAX_DIGIT_COUNT]; + int digitCount; + int exp10; + boolean truncated; + boolean negative; + + // Before calling this method we have to make sure that the significand is within the range of [0, 2^53 - 1]. + long computeSignificand() { + if (digitCount == 0 || exp10 < 0) { + return 0; + } + long significand = 0; + for (int i = 0; i < exp10; i++) { + significand = (10 * significand) + ((i < digitCount) ? digits[i] : 0); + } + boolean roundUp = false; + if (exp10 < digitCount) { + roundUp = digits[exp10] >= 5; + if ((digits[exp10] == 5) && (exp10 + 1 == digitCount)) { + // If the digits haven't been truncated, then we are exactly halfway between two integers. In such + // cases, we round to even, otherwise we round up. + roundUp = truncated || (significand & 1) == 1; + } + } + return roundUp ? ++significand : significand; + } + + void shiftLeft(int shift) { + if (digitCount == 0) { + return; + } + + int numberOfAdditionalDigits = calculateNumberOfAdditionalDigitsAfterLeftShift(shift); + int readIndex = digitCount - 1; + int writeIndex = digitCount - 1 + numberOfAdditionalDigits; + long n = 0; + + while (readIndex >= 0) { + n += (long) digits[readIndex] << shift; + long quotient = divideUnsigned(n, 10); + long remainder = remainderUnsigned(n, 10); + if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { + digits[writeIndex] = (byte) remainder; + } else if (remainder > 0) { + truncated = true; + } + n = quotient; + writeIndex--; + readIndex--; + } + + while (compareUnsigned(n, 0) > 0) { + long quotient = divideUnsigned(n, 10); + long remainder = remainderUnsigned(n, 10); + if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { + digits[writeIndex] = (byte) remainder; + } else if (remainder > 0) { + truncated = true; + } + n = quotient; + writeIndex--; + } + digitCount += numberOfAdditionalDigits; + if (digitCount > SLOW_PATH_MAX_DIGIT_COUNT) { + digitCount = SLOW_PATH_MAX_DIGIT_COUNT; + } + exp10 += numberOfAdditionalDigits; + trimTrailingZeros(); + } + + // See https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html#hpd-shifts + private int calculateNumberOfAdditionalDigitsAfterLeftShift(int shift) { + int a = NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT[shift]; + int b = NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT[shift + 1]; + int newDigitCount = a >> 11; + int pow5OffsetA = 0x7FF & a; + int pow5OffsetB = 0x7FF & b; + + int n = pow5OffsetB - pow5OffsetA; + for (int i = 0; i < n; i++) { + if (i >= digitCount) { + return newDigitCount - 1; + } else if (digits[i] < POWER_OF_FIVE_DIGITS[pow5OffsetA + i]) { + return newDigitCount - 1; + } else if (digits[i] > POWER_OF_FIVE_DIGITS[pow5OffsetA + i]) { + return newDigitCount; + } + } + return newDigitCount; + } + + void shiftRight(int shift) { + int readIndex = 0; + int writeIndex = 0; + long n = 0; + + while ((n >>> shift) == 0) { + if (readIndex < digitCount) { + n = (10 * n) + digits[readIndex++]; + } else if (n == 0) { + return; + } else { + while ((n >>> shift) == 0) { + n = 10 * n; + readIndex++; + } + break; + } + } + exp10 -= (readIndex - 1); + long mask = (1L << shift) - 1; + while (readIndex < digitCount) { + byte newDigit = (byte) (n >>> shift); + n = (10 * (n & mask)) + digits[readIndex++]; + digits[writeIndex++] = newDigit; + } + while (compareUnsigned(n, 0) > 0) { + byte newDigit = (byte) (n >>> shift); + n = 10 * (n & mask); + if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { + digits[writeIndex++] = newDigit; + } else if (newDigit > 0) { + truncated = true; + } + } + digitCount = writeIndex; + trimTrailingZeros(); + } + + private void trimTrailingZeros() { + while ((digitCount > 0) && (digits[digitCount - 1] == 0)) { + digitCount--; + } + } + + private void reset() { + digitCount = 0; + exp10 = 0; + truncated = false; + } + } +} diff --git a/src/main/java/org/simdjson/ExponentParser.java b/src/main/java/org/simdjson/ExponentParser.java new file mode 100644 index 0000000..be07a37 --- /dev/null +++ b/src/main/java/org/simdjson/ExponentParser.java @@ -0,0 +1,94 @@ +package org.simdjson; + +class ExponentParser { + + private final ExponentParsingResult result = new ExponentParsingResult(); + + static boolean isExponentIndicator(byte b) { + return 'e' == b || 'E' == b; + } + + ExponentParsingResult parse(byte[] buffer, int currentIdx, long exponent) { + boolean negative = '-' == buffer[currentIdx]; + if (negative || '+' == buffer[currentIdx]) { + currentIdx++; + } + int exponentStartIdx = currentIdx; + + long parsedExponent = 0; + byte digit = convertCharacterToDigit(buffer[currentIdx]); + while (digit >= 0 && digit <= 9) { + parsedExponent = 10 * parsedExponent + digit; + currentIdx++; + digit = convertCharacterToDigit(buffer[currentIdx]); + } + + if (exponentStartIdx == currentIdx) { + throw new JsonParsingException("Invalid number. Exponent indicator has to be followed by a digit."); + } + // Long.MAX_VALUE = 9223372036854775807 (19 digits). Therefore, any number with <= 18 digits can be safely + // stored in a long without causing an overflow. + int maxDigitCountLongCanAccommodate = 18; + if (currentIdx > exponentStartIdx + maxDigitCountLongCanAccommodate) { + // Potentially, we have an overflow here. We try to skip leading zeros. + while (buffer[exponentStartIdx] == '0') { + exponentStartIdx++; + } + if (currentIdx > exponentStartIdx + maxDigitCountLongCanAccommodate) { + // We still have more digits than a long can safely accommodate. + // + // The largest finite number that can be represented in binary64 is (1-2^-53) * 2^1024, which is about + // 1.798e308, and the smallest non-zero number is 2^-1074, roughly 4.941e-324. So, we might, potentially, + // care only about numbers with explicit exponents falling within the range of [-324, 308], and return + // either zero or infinity for everything outside of this range.However, we have to take into account + // the fractional part of the parsed number. This part can potentially cancel out the value of the + // explicit exponent. For example, 1000e-325 (1 * 10^3 * 10^-325 = 1 * 10^-322) is not equal to zero + // despite the explicit exponent being less than -324. + // + // Let's consider a scenario where the explicit exponent is greater than 999999999999999999. As long as + // the fractional part has <= 999999999999999690 digits, it doesn't matter whether we take + // 999999999999999999 or its actual value as the explicit exponent. This is due to the fact that the + // parsed number is infinite anyway (w * 10^-q * 10^999999999999999999 > (1-2^-53) * 2^1024, 0 < w < 10, + // 0 <= q <= 999999999999999690). Similarly, in a scenario where the explicit exponent is less than + // -999999999999999999, as long as the fractional part has <= 999999999999999674 digits, we can safely + // take 999999999999999999 as the explicit exponent, given that the parsed number is zero anyway + // (w * 10^q * 10^-999999999999999999 < 2^-1074, 0 < w < 10, 0 <= q <= 999999999999999674) + // + // Note that if the fractional part had 999999999999999674 digits, the JSON size would need to be + // 999999999999999674 bytes, which is approximately ~888 PiB. Consequently, it's reasonable to assume + // that the fractional part contains no more than 999999999999999674 digits. + parsedExponent = 999999999999999999L; + } + } + // Note that we don't check if 'exponent' has overflowed after the following addition. This is because we + // know that the parsed exponent falls within the range of [-999999999999999999, 999999999999999999]. We also + // assume that 'exponent' before the addition is within the range of [-9223372036854775808, 9223372036854775807]. + // This assumption should always be valid as the value of 'exponent' is constrained by the size of the JSON input. + exponent += negative ? -parsedExponent : parsedExponent; + return result.of(exponent, currentIdx); + } + + private static byte convertCharacterToDigit(byte b) { + return (byte) (b - '0'); + } + + static class ExponentParsingResult { + + private long exponent; + private int currentIdx; + + ExponentParsingResult of(long exponent, int currentIdx) { + this.exponent = exponent; + this.currentIdx = currentIdx; + return this; + } + + long exponent() { + return exponent; + } + + int currentIdx() { + return currentIdx; + } + } +} diff --git a/src/main/java/org/simdjson/FloatParser.java b/src/main/java/org/simdjson/FloatParser.java new file mode 100644 index 0000000..0d3ebfa --- /dev/null +++ b/src/main/java/org/simdjson/FloatParser.java @@ -0,0 +1,504 @@ +package org.simdjson; + +import static java.lang.Float.NEGATIVE_INFINITY; +import static java.lang.Float.POSITIVE_INFINITY; +import static java.lang.Long.compareUnsigned; +import static java.lang.Long.divideUnsigned; +import static java.lang.Long.numberOfLeadingZeros; +import static java.lang.Long.remainderUnsigned; +import static java.lang.Math.abs; +import static java.lang.Math.unsignedMultiplyHigh; +import static org.simdjson.ExponentParser.isExponentIndicator; +import static org.simdjson.NumberParserTables.MIN_POWER_OF_FIVE; +import static org.simdjson.NumberParserTables.NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT; +import static org.simdjson.NumberParserTables.POWERS_OF_FIVE; +import static org.simdjson.NumberParserTables.POWER_OF_FIVE_DIGITS; + +class FloatParser { + + // When parsing floats, we assume that a long used to store digits is unsigned. Thus, it can safely accommodate + // up to 19 digits (9999999999999999999 < 2^64). + private static final int FAST_PATH_MAX_DIGIT_COUNT = 19; + // The smallest non-zero number representable in binary32 is 2^-149, which is about 1.4 * 10^-45. + // If we consider a number in the form of w * 10^q where 1 <= w <= 9999999999999999999, then + // 1 * 10^q <= w * 10^q <= 9.999999999999999999 * 10^18 * 10^q. To ensure w * 10^q < 2^-149, q must satisfy the + // following inequality: 9.999999999999999999 * 10^(18 + q) < 2^-149. This condition holds true whenever + // 18 + q < -45. Thus, for q < -63, we can reliably conclude that the number w * 10^q is smaller than 2^-149, + // and this, in turn means the number is equal to zero. + private static final int FAST_PATH_MIN_POWER_OF_TEN = -63; // todo: https://github.com/fastfloat/fast_float/pull/167 + // We know that (1 - 2^-24) * 2^128, which is about 3.4 * 10^38, is the largest number representable in binary64. + // When the parsed number is expressed as w * 10^q, where w >= 1, we are sure that for any q > 38, the number is + // infinite. + private static final int FAST_PATH_MAX_POWER_OF_TEN = 38; + private static final float[] POWERS_OF_TEN = { + 1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, 1e6f, 1e7f, 1e8f, 1e9f, 1e10f + }; + private static final long MAX_LONG_REPRESENTED_AS_FLOAT_EXACTLY = (1L << 24) - 1; + private static final int IEEE32_EXPONENT_BIAS = 127; + private static final int IEEE32_SIGN_BIT_INDEX = 31; + private static final int IEEE32_SIGNIFICAND_EXPLICIT_BIT_COUNT = 23; + private static final int IEEE32_SIGNIFICAND_SIZE_IN_BITS = IEEE32_SIGNIFICAND_EXPLICIT_BIT_COUNT + 1; + private static final int IEEE32_MAX_FINITE_NUMBER_EXPONENT = 127; + private static final int IEEE32_MIN_FINITE_NUMBER_EXPONENT = -126; + private static final int IEEE32_SUBNORMAL_EXPONENT = -127; + // This is the upper limit for the count of decimal digits taken into account in the slow path. All digits exceeding + // this threshold are excluded. + private static final int SLOW_PATH_MAX_DIGIT_COUNT = 800; + private static final int SLOW_PATH_MAX_SHIFT = 60; + private static final byte[] SLOW_PATH_SHIFTS = { + 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, + 33, 36, 39, 43, 46, 49, 53, 56, 59, + }; + private static final long MULTIPLICATION_MASK = 0xFFFFFFFFFFFFFFFFL >>> IEEE32_SIGNIFICAND_EXPLICIT_BIT_COUNT + 3; + + private final SlowPathDecimal slowPathDecimal = new SlowPathDecimal(); + private final ExponentParser exponentParser = new ExponentParser(); + + float parse(byte[] buffer, int offset, boolean negative, int digitsStartIdx, int digitCount, long digits, long exponent) { + if (shouldBeHandledBySlowPath(buffer, digitsStartIdx, digitCount)) { + return slowlyParseFloat(buffer, offset); + } else { + return computeFloat(negative, digits, exponent); + } + } + + private static boolean shouldBeHandledBySlowPath(byte[] buffer, int startDigitsIdx, int digitCount) { + if (digitCount <= FAST_PATH_MAX_DIGIT_COUNT) { + return false; + } + int start = startDigitsIdx; + while (buffer[start] == '0' || buffer[start] == '.') { + start++; + } + int significantDigitCount = digitCount - (start - startDigitsIdx); + return significantDigitCount > FAST_PATH_MAX_DIGIT_COUNT; + } + + private static float computeFloat(boolean negative, long significand10, long exp10) { + if (abs(exp10) < POWERS_OF_TEN.length && compareUnsigned(significand10, MAX_LONG_REPRESENTED_AS_FLOAT_EXACTLY) <= 0) { + // This path has been described in https://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/. + float result = significand10; + if (exp10 < 0) { + result = result / POWERS_OF_TEN[(int) -exp10]; + } else { + result = result * POWERS_OF_TEN[(int) exp10]; + } + return negative ? -result : result; + } + + // The following path is an implementation of the Eisel-Lemire algorithm described by Daniel Lemire in + // "Number Parsing at a Gigabyte per Second" (https://arxiv.org/abs/2101.11408). + + if (exp10 < FAST_PATH_MIN_POWER_OF_TEN || significand10 == 0) { + return zero(negative); + } else if (exp10 > FAST_PATH_MAX_POWER_OF_TEN) { + return infinity(negative); + } + + // We start by normalizing the decimal significand so that it is within the range of [2^63, 2^64). + int lz = numberOfLeadingZeros(significand10); + significand10 <<= lz; + + // Initially, the number we are parsing is in the form of w * 10^q = w * 5^q * 2^q, and our objective is to + // convert it to m * 2^p. We can represent w * 10^q as w * 5^q * 2^r * 2^p, where w * 5^q * 2^r = m. + // Therefore, in the next step we compute w * 5^q. The implementation of this multiplication is optimized + // to minimize necessary operations while ensuring precise results. For more information, refer to the + // aforementioned paper. + int powersOfFiveTableIndex = 2 * (int) (exp10 - MIN_POWER_OF_FIVE); + long upper = unsignedMultiplyHigh(significand10, POWERS_OF_FIVE[powersOfFiveTableIndex]); + long lower = significand10 * POWERS_OF_FIVE[powersOfFiveTableIndex]; + if ((upper & MULTIPLICATION_MASK) == MULTIPLICATION_MASK) { + long secondUpper = unsignedMultiplyHigh(significand10, POWERS_OF_FIVE[powersOfFiveTableIndex + 1]); + lower += secondUpper; + if (compareUnsigned(secondUpper, lower) > 0) { + upper++; + } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without Fallback" + // (https://arxiv.org/abs/2212.06644), at this point we are sure that the product is sufficiently accurate, + // and more computation is not needed. + } + + // Here, we extract the binary significand from the product. Although in binary32 the significand has 24 bits, + // we extract 25 bits to use the least significant bit for rounding. Since both the decimal significand and the + // values stored in POWERS_OF_FIVE are normalized, ensuring that their most significant bits are set, the product + // has either 0 or 1 leading zeros. As a result, we need to perform a right shift of either 38 or 39 bits. + long upperBit = upper >>> 63; + long upperShift = upperBit + 38; + long significand2 = upper >>> upperShift; + + // Now, we have to determine the value of the binary exponent. Let's begin by calculating the contribution of + // 10^q. Our goal is to compute f0 and f1 such that: + // - when q >= 0: 10^q = (5^q / 2^(f0 - q)) * 2^f0 + // - when q < 0: 10^q = (2^(f1 - q) / 5^-q) * 2^f1 + // Both (5^q / 2^(f0 - q)) and (2^(f1 - q) / 5^-q) must fall within the range of [1, 2). + // It turns out that these conditions are met when: + // - 0 <= q <= FAST_PATH_MAX_POWER_OF_TEN, and f0 = floor(log2(5^q)) + q = floor(q * log(5) / log(2)) + q = (217706 * q) / 2^16. + // - FAST_PATH_MIN_POWER_OF_TEN <= q < 0, and f1 = -ceil(log2(5^-q)) + q = -ceil(-q * log(5) / log(2)) + q = (217706 * q) / 2^16. + // Thus, we can express the contribution of 10^q to the exponent as (217706 * exp10) >> 16. + // + // Furthermore, we need to factor in the following normalizations we've performed: + // - shifting the decimal significand left bitwise + // - shifting the binary significand right bitwise if the most significant bit of the product was 1 + // Therefore, we add (63 - lz + upperBit) to the exponent. + long exp2 = ((217706 * exp10) >> 16) + 63 - lz + upperBit; + if (exp2 < IEEE32_MIN_FINITE_NUMBER_EXPONENT) { + // In the next step, we right-shift the binary significand by the difference between the minimum exponent + // and the binary exponent. In Java, the shift distance is limited to the range of 0 to 63, inclusive. + // Thus, we need to handle the case when the distance is >= 64 separately and always return zero. + if (exp2 <= IEEE32_MIN_FINITE_NUMBER_EXPONENT - 64) { + return zero(negative); + } + + // In this branch, it is likely that we are handling a subnormal number. Therefore, we adjust the significand + // to conform to the formula representing subnormal numbers: (significand2 * 2^(1 - IEEE32_EXPONENT_BIAS)) / 2^23. + significand2 >>= 1 - IEEE32_EXPONENT_BIAS - exp2; + // Round up if the significand is odd and remove the least significant bit that we've left for rounding. + + significand2 += significand2 & 1; + significand2 >>= 1; + + // Here, we are addressing a scenario in which the original number was subnormal, but it became normal after + // rounding up. For example, when we are parsing 1.17549433e-38 before rounding and removing the least + // significant bit significand2 = 0x1FFFFFF and exp2 = -127. After rounding, we get significand2 = 0x800000, + // which is the significand of the smallest normal number. + exp2 = (significand2 < (1L << 23)) ? IEEE32_SUBNORMAL_EXPONENT : IEEE32_MIN_FINITE_NUMBER_EXPONENT; + return toFloat(negative, (int) significand2, (int) exp2); + } + + // Here, we are addressing a scenario of rounding the binary significand when it falls precisely halfway + // between two integers. To understand the rationale behind the conditions used to identify this case, refer to + // sections 6, 8.1, and 9.1 of "Number Parsing at a Gigabyte per Second". + if (exp10 >= -17 && exp10 <= 10) { + if ((significand2 << upperShift == upper) && (compareUnsigned(lower, 1) <= 0)) { + if ((significand2 & 3) == 1) { + significand2 &= ~1; + } + } + } + + // Round up if the significand is odd and remove the least significant bit that we've left for rounding. + significand2 += significand2 & 1; + significand2 >>= 1; + + if (significand2 == (1L << IEEE32_SIGNIFICAND_SIZE_IN_BITS)) { + // If we've reached here, it means that rounding has caused an overflow. We need to divide the significand + // by 2 and update the exponent accordingly. + significand2 >>= 1; + exp2++; + } + + if (exp2 > IEEE32_MAX_FINITE_NUMBER_EXPONENT) { + return infinity(negative); + } + return toFloat(negative, (int) significand2, (int) exp2); + } + + private static float toFloat(boolean negative, int significand2, int exp2) { + int bits = significand2; + bits &= ~(1 << IEEE32_SIGNIFICAND_EXPLICIT_BIT_COUNT); // clear the implicit bit + bits |= (exp2 + IEEE32_EXPONENT_BIAS) << IEEE32_SIGNIFICAND_EXPLICIT_BIT_COUNT; + bits = negative ? (bits | (1 << IEEE32_SIGN_BIT_INDEX)) : bits; + return Float.intBitsToFloat(bits); + } + + private static float infinity(boolean negative) { + return negative ? NEGATIVE_INFINITY : POSITIVE_INFINITY; + } + + private static float zero(boolean negative) { + return negative ? -0.0f : 0.0f; + } + + // The following parser is based on the idea described in + // https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html and implemented in + // https://github.com/simdjson/simdjson/blob/caff09cafceb0f5f6fc9109236d6dd09ac4bc0d8/src/from_chars.cpp + private float slowlyParseFloat(byte[] buffer, int offset) { + final SlowPathDecimal decimal = slowPathDecimal; + decimal.reset(); + + decimal.negative = buffer[offset] == '-'; + int currentIdx = decimal.negative ? offset + 1 : offset; + long exp10 = 0; + + currentIdx = skipZeros(buffer, currentIdx); + currentIdx = parseDigits(buffer, decimal, currentIdx); + if (buffer[currentIdx] == '.') { + currentIdx++; + int firstIdxAfterPeriod = currentIdx; + if (decimal.digitCount == 0) { + currentIdx = skipZeros(buffer, currentIdx); + } + currentIdx = parseDigits(buffer, decimal, currentIdx); + exp10 = firstIdxAfterPeriod - currentIdx; + } + + int currentIdxMovingBackwards = currentIdx - 1; + int trailingZeros = 0; + // Here, we also skip the period to handle cases like 100000000000000000000.000000 + while (buffer[currentIdxMovingBackwards] == '0' || buffer[currentIdxMovingBackwards] == '.') { + if (buffer[currentIdxMovingBackwards] == '0') { + trailingZeros++; + } + currentIdxMovingBackwards--; + } + exp10 += decimal.digitCount; + decimal.digitCount -= trailingZeros; + + if (decimal.digitCount > SLOW_PATH_MAX_DIGIT_COUNT) { + decimal.digitCount = SLOW_PATH_MAX_DIGIT_COUNT; + decimal.truncated = true; + } + + if (isExponentIndicator(buffer[currentIdx])) { + currentIdx++; + exp10 = exponentParser.parse(buffer, currentIdx, exp10).exponent(); + } + + // At this point, the number we are parsing is represented in the following way: w * 10^exp10, where -1 < w < 1. + if (exp10 <= -46) { + // We know that -1e-46 < w * 10^exp10 < 1e-46. In binary32 -1e-46 = -0.0 and 1e-46 = +0.0, so we can + // safely return +/-0.0. + return zero(decimal.negative); + } else if (exp10 >= 40) { + // We know that either w * 10^exp10 <= -0.1e40 or w * 10^exp10 >= 0.1e40. + // In binary32 -0.1e40 = -inf and 0.1e40 = +inf, so we can safely return +/-inf. + return infinity(decimal.negative); + } + + decimal.exp10 = (int) exp10; + int exp2 = 0; + + // We start the following loop with the decimal in the form of w * 10^exp10. After a series of + // right-shifts (dividing by a power of 2), we transform the decimal into w' * 2^exp2 * 10^exp10', + // where exp10' is <= 0. Resultantly, w' * 10^exp10' is in the range of [0, 1). + while (decimal.exp10 > 0) { + int shift = resolveShiftDistanceBasedOnExponent10(decimal.exp10); + decimal.shiftRight(shift); + exp2 += shift; + } + + // Now, we are left-shifting to get to the point where w'' * 10^exp10'' is within the range of [1/2, 1). + while (decimal.exp10 <= 0) { + int shift; + if (decimal.exp10 == 0) { + if (decimal.digits[0] >= 5) { + break; + } + shift = (decimal.digits[0] < 2) ? 2 : 1; + } else { + shift = resolveShiftDistanceBasedOnExponent10(-decimal.exp10); + } + decimal.shiftLeft(shift); + exp2 -= shift; + } + + // Here, w'' * 10^exp10'' falls within the range of [1/2, 1). In binary32, the significand must be within the + // range of [1, 2). We can get to the target range by decreasing the binary exponent. Resultantly, the decimal + // is represented as w'' * 10^exp10'' * 2^exp2, where w'' * 10^exp10'' is in the range of [1, 2). + exp2--; + + while (IEEE32_MIN_FINITE_NUMBER_EXPONENT > exp2) { + int n = IEEE32_MIN_FINITE_NUMBER_EXPONENT - exp2; + if (n > SLOW_PATH_MAX_SHIFT) { + n = SLOW_PATH_MAX_SHIFT; + } + decimal.shiftRight(n); + exp2 += n; + } + + // To conform to the IEEE 754 standard, the binary significand must fall within the range of [2^23, 2^24). Hence, + // we perform the following multiplication. If, after this step, the significand is less than 2^23, we have a + // subnormal number, which we will address later. + decimal.shiftLeft(IEEE32_SIGNIFICAND_SIZE_IN_BITS); + + long significand2 = decimal.computeSignificand(); + if (significand2 >= (1L << IEEE32_SIGNIFICAND_SIZE_IN_BITS)) { + // If we've reached here, it means that rounding has caused an overflow. We need to divide the significand + // by 2 and update the exponent accordingly. + significand2 >>= 1; + exp2++; + } + + if (significand2 < (1L << IEEE32_SIGNIFICAND_EXPLICIT_BIT_COUNT)) { + exp2 = IEEE32_SUBNORMAL_EXPONENT; + } + if (exp2 > IEEE32_MAX_FINITE_NUMBER_EXPONENT) { + return infinity(decimal.negative); + } + return toFloat(decimal.negative, (int) significand2, exp2); + } + + private static int resolveShiftDistanceBasedOnExponent10(int exp10) { + return (exp10 < SLOW_PATH_SHIFTS.length) ? SLOW_PATH_SHIFTS[exp10] : SLOW_PATH_MAX_SHIFT; + } + + private int skipZeros(byte[] buffer, int currentIdx) { + while (buffer[currentIdx] == '0') { + currentIdx++; + } + return currentIdx; + } + + private int parseDigits(byte[] buffer, SlowPathDecimal decimal, int currentIdx) { + while (isDigit(buffer[currentIdx])) { + if (decimal.digitCount < SLOW_PATH_MAX_DIGIT_COUNT) { + decimal.digits[decimal.digitCount] = convertCharacterToDigit(buffer[currentIdx]); + } + decimal.digitCount++; + currentIdx++; + } + return currentIdx; + } + + private static byte convertCharacterToDigit(byte b) { + return (byte) (b - '0'); + } + + private static boolean isDigit(byte b) { + return b >= '0' && b <= '9'; + } + + private static class SlowPathDecimal { + + final byte[] digits = new byte[SLOW_PATH_MAX_DIGIT_COUNT]; + int digitCount; + int exp10; + boolean truncated; + boolean negative; + + // Before calling this method we have to make sure that the significand is within the range of [0, 2^24 - 1]. + long computeSignificand() { + if (digitCount == 0 || exp10 < 0) { + return 0; + } + long significand = 0; + for (int i = 0; i < exp10; i++) { + significand = (10 * significand) + ((i < digitCount) ? digits[i] : 0); + } + boolean roundUp = false; + if (exp10 < digitCount) { + roundUp = digits[exp10] >= 5; + if ((digits[exp10] == 5) && (exp10 + 1 == digitCount)) { + // If the digits haven't been truncated, then we are exactly halfway between two integers. In such + // cases, we round to even, otherwise we round up. + roundUp = truncated || (significand & 1) == 1; + } + } + return roundUp ? ++significand : significand; + } + + void shiftLeft(int shift) { + if (digitCount == 0) { + return; + } + + int numberOfAdditionalDigits = calculateNumberOfAdditionalDigitsAfterLeftShift(shift); + int readIndex = digitCount - 1; + int writeIndex = digitCount - 1 + numberOfAdditionalDigits; + long n = 0; + + while (readIndex >= 0) { + n += (long) digits[readIndex] << shift; + long quotient = divideUnsigned(n, 10); + long remainder = remainderUnsigned(n, 10); + if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { + digits[writeIndex] = (byte) remainder; + } else if (remainder > 0) { + truncated = true; + } + n = quotient; + writeIndex--; + readIndex--; + } + + while (compareUnsigned(n, 0) > 0) { + long quotient = divideUnsigned(n, 10); + long remainder = remainderUnsigned(n, 10); + if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { + digits[writeIndex] = (byte) remainder; + } else if (remainder > 0) { + truncated = true; + } + n = quotient; + writeIndex--; + } + digitCount += numberOfAdditionalDigits; + if (digitCount > SLOW_PATH_MAX_DIGIT_COUNT) { + digitCount = SLOW_PATH_MAX_DIGIT_COUNT; + } + exp10 += numberOfAdditionalDigits; + trimTrailingZeros(); + } + + // See https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html#hpd-shifts + private int calculateNumberOfAdditionalDigitsAfterLeftShift(int shift) { + int a = NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT[shift]; + int b = NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT[shift + 1]; + int newDigitCount = a >> 11; + int pow5OffsetA = 0x7FF & a; + int pow5OffsetB = 0x7FF & b; + + int n = pow5OffsetB - pow5OffsetA; + for (int i = 0; i < n; i++) { + if (i >= digitCount) { + return newDigitCount - 1; + } else if (digits[i] < POWER_OF_FIVE_DIGITS[pow5OffsetA + i]) { + return newDigitCount - 1; + } else if (digits[i] > POWER_OF_FIVE_DIGITS[pow5OffsetA + i]) { + return newDigitCount; + } + } + return newDigitCount; + } + + void shiftRight(int shift) { + int readIndex = 0; + int writeIndex = 0; + long n = 0; + + while ((n >>> shift) == 0) { + if (readIndex < digitCount) { + n = (10 * n) + digits[readIndex++]; + } else if (n == 0) { + return; + } else { + while ((n >>> shift) == 0) { + n = 10 * n; + readIndex++; + } + break; + } + } + exp10 -= (readIndex - 1); + long mask = (1L << shift) - 1; + while (readIndex < digitCount) { + byte newDigit = (byte) (n >>> shift); + n = (10 * (n & mask)) + digits[readIndex++]; + digits[writeIndex++] = newDigit; + } + while (compareUnsigned(n, 0) > 0) { + byte newDigit = (byte) (n >>> shift); + n = 10 * (n & mask); + if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { + digits[writeIndex++] = newDigit; + } else if (newDigit > 0) { + truncated = true; + } + } + digitCount = writeIndex; + trimTrailingZeros(); + } + + private void trimTrailingZeros() { + while ((digitCount > 0) && (digits[digitCount - 1] == 0)) { + digitCount--; + } + } + + private void reset() { + digitCount = 0; + exp10 = 0; + truncated = false; + } + } +} diff --git a/src/main/java/org/simdjson/JsonIterator.java b/src/main/java/org/simdjson/JsonIterator.java index 3e96274..2ca2f26 100644 --- a/src/main/java/org/simdjson/JsonIterator.java +++ b/src/main/java/org/simdjson/JsonIterator.java @@ -17,21 +17,28 @@ class JsonIterator { private final BitIndexes indexer; private final boolean[] isArray; - JsonIterator(BitIndexes indexer, int capacity, int maxDepth, int padding) { + JsonIterator(BitIndexes indexer, byte[] stringBuffer, int capacity, int maxDepth, int padding) { this.indexer = indexer; this.isArray = new boolean[maxDepth]; - this.tapeBuilder = new TapeBuilder(capacity, maxDepth, padding); + this.tapeBuilder = new TapeBuilder(capacity, maxDepth, padding, stringBuffer); } JsonValue walkDocument(byte[] buffer, int len) { + if (indexer.isEnd()) { + throw new JsonParsingException("No structural element found."); + } + tapeBuilder.visitDocumentStart(); int depth = 0; int state; - int idx = indexer.advance(); + int idx = indexer.getAndAdvance(); switch (buffer[idx]) { case '{' -> { + if (buffer[indexer.getLast()] != '}') { + throw new JsonParsingException("Unclosed object. Missing '}' for starting '{'."); + } if (buffer[indexer.peek()] == '}') { indexer.advance(); tapeBuilder.visitEmptyObject(); @@ -41,6 +48,9 @@ JsonValue walkDocument(byte[] buffer, int len) { } } case '[' -> { + if (buffer[indexer.getLast()] != ']') { + throw new JsonParsingException("Unclosed array. Missing ']' for starting '['."); + } if (buffer[indexer.peek()] == ']') { indexer.advance(); tapeBuilder.visitEmptyArray(); @@ -55,13 +65,13 @@ JsonValue walkDocument(byte[] buffer, int len) { } } - while (indexer.hasNext()) { + while (state != DOCUMENT_END) { if (state == OBJECT_BEGIN) { depth++; isArray[depth] = false; tapeBuilder.visitObjectStart(depth); - int keyIdx = indexer.advance(); + int keyIdx = indexer.getAndAdvance(); if (buffer[keyIdx] != '"') { throw new JsonParsingException("Object does not start with a key"); } @@ -71,10 +81,10 @@ JsonValue walkDocument(byte[] buffer, int len) { } if (state == OBJECT_FIELD) { - if (buffer[indexer.advance()] != ':') { + if (buffer[indexer.getAndAdvance()] != ':') { throw new JsonParsingException("Missing colon after key in object"); } - idx = indexer.advance(); + idx = indexer.getAndAdvance(); switch (buffer[idx]) { case '{' -> { if (buffer[indexer.peek()] == '}') { @@ -102,10 +112,10 @@ JsonValue walkDocument(byte[] buffer, int len) { } if (state == OBJECT_CONTINUE) { - switch (buffer[indexer.advance()]) { + switch (buffer[indexer.getAndAdvance()]) { case ',' -> { tapeBuilder.incrementCount(depth); - int keyIdx = indexer.advance(); + int keyIdx = indexer.getAndAdvance(); if (buffer[keyIdx] != '"') { throw new JsonParsingException("Key string missing at beginning of field in object"); } @@ -140,7 +150,7 @@ JsonValue walkDocument(byte[] buffer, int len) { } if (state == ARRAY_VALUE) { - idx = indexer.advance(); + idx = indexer.getAndAdvance(); switch (buffer[idx]) { case '{' -> { if (buffer[indexer.peek()] == '}') { @@ -168,7 +178,7 @@ JsonValue walkDocument(byte[] buffer, int len) { } if (state == ARRAY_CONTINUE) { - switch (buffer[indexer.advance()]) { + switch (buffer[indexer.getAndAdvance()]) { case ',' -> { tapeBuilder.incrementCount(depth); state = ARRAY_VALUE; @@ -180,14 +190,11 @@ JsonValue walkDocument(byte[] buffer, int len) { default -> throw new JsonParsingException("Missing comma between array values"); } } + } + tapeBuilder.visitDocumentEnd(); - if (state == DOCUMENT_END) { - tapeBuilder.visitDocumentEnd(); - - if (!indexer.isEnd()) { - throw new JsonParsingException("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); - } - } + if (!indexer.isEnd()) { + throw new JsonParsingException("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); } return tapeBuilder.createJsonValue(buffer); } diff --git a/src/main/java/org/simdjson/JsonParsingException.java b/src/main/java/org/simdjson/JsonParsingException.java index 5091eb2..2c924d8 100644 --- a/src/main/java/org/simdjson/JsonParsingException.java +++ b/src/main/java/org/simdjson/JsonParsingException.java @@ -5,4 +5,8 @@ public class JsonParsingException extends RuntimeException { JsonParsingException(String message) { super(message); } + + JsonParsingException(String message, Throwable throwable) { + super(message, throwable); + } } diff --git a/src/main/java/org/simdjson/JsonValue.java b/src/main/java/org/simdjson/JsonValue.java index 279f55a..6877519 100644 --- a/src/main/java/org/simdjson/JsonValue.java +++ b/src/main/java/org/simdjson/JsonValue.java @@ -3,6 +3,7 @@ import java.util.Arrays; import java.util.Iterator; import java.util.Map; +import java.util.NoSuchElementException; import static org.simdjson.Tape.DOUBLE; import static org.simdjson.Tape.FALSE_VALUE; @@ -157,9 +158,12 @@ public boolean hasNext() { @Override public JsonValue next() { - JsonValue value = new JsonValue(tape, idx, stringBuffer, buffer); - idx = tape.computeNextIndex(idx); - return value; + if (hasNext()) { + JsonValue value = new JsonValue(tape, idx, stringBuffer, buffer); + idx = tape.computeNextIndex(idx); + return value; + } + throw new NoSuchElementException("No more elements"); } } diff --git a/src/main/java/org/simdjson/NumberParser.java b/src/main/java/org/simdjson/NumberParser.java index 7c9c101..44fab42 100644 --- a/src/main/java/org/simdjson/NumberParser.java +++ b/src/main/java/org/simdjson/NumberParser.java @@ -1,73 +1,34 @@ package org.simdjson; -import static java.lang.Double.NEGATIVE_INFINITY; -import static java.lang.Double.POSITIVE_INFINITY; -import static java.lang.Double.longBitsToDouble; -import static java.lang.Long.compareUnsigned; -import static java.lang.Long.divideUnsigned; -import static java.lang.Long.numberOfLeadingZeros; -import static java.lang.Long.remainderUnsigned; -import static java.lang.Math.abs; -import static java.lang.Math.unsignedMultiplyHigh; +import org.simdjson.ExponentParser.ExponentParsingResult; + import static org.simdjson.CharacterUtils.isStructuralOrWhitespace; -import static org.simdjson.NumberParserTables.NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT; -import static org.simdjson.NumberParserTables.POWERS_OF_FIVE; -import static org.simdjson.NumberParserTables.POWER_OF_FIVE_DIGITS; +import static org.simdjson.ExponentParser.isExponentIndicator; class NumberParser { - // When parsing doubles, we assume that a long used to store digits is unsigned. Thus, it can safely accommodate - // up to 19 digits (9999999999999999999 < 2^64). - private static final int FAST_PATH_MAX_DIGIT_COUNT = 19; - // The smallest non-zero number representable in binary64 is 2^-1074, which is about 4.941 * 10^-324. - // If we consider a number in the form of w * 10^q where 1 <= w <= 9999999999999999999, then - // 1 * 10^q <= w * 10^q <= 9.999999999999999999 * 10^18 * 10^q. To ensure w * 10^q < 2^-1074, q must satisfy the - // following inequality: 9.999999999999999999 * 10^(18 + q) < 2^-1074. This condition holds true whenever - // 18 + q < -324. Thus, for q < -342, we can reliably conclude that the number w * 10^q is smaller than 2^-1074, - // and this, in turn means the number is equal to zero. - private static final int FAST_PATH_MIN_POWER_OF_TEN = -342; - // We know that (1 - 2^-53) * 2^1024, which is about 1.798 * 10^308, is the largest number representable in binary64. - // When the parsed number is expressed as w * 10^q, where w >= 1, we are sure that for any q > 308, the number is - // infinite. - private static final int FAST_PATH_MAX_POWER_OF_TEN = 308; - private static final double[] POWERS_OF_TEN = { - 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, - 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22 - }; - private static final long MAX_LONG_REPRESENTED_AS_DOUBLE_EXACTLY = 9007199254740991L; // 2^53 - 1 - private static final int IEEE64_EXPONENT_BIAS = 1023; - private static final int IEEE64_SIGN_BIT_INDEX = 63; - private static final int IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT = 52; - private static final int IEEE64_SIGNIFICAND_SIZE_IN_BITS = IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT + 1; - private static final int IEEE64_MAX_FINITE_NUMBER_EXPONENT = 1023; - private static final int IEEE64_MIN_FINITE_NUMBER_EXPONENT = -1022; - private static final int IEEE64_SUBNORMAL_EXPONENT = -1023; + private static final int BYTE_MAX_DIGIT_COUNT = 3; + private static final int BYTE_MAX_ABS_VALUE = 128; + private static final int SHORT_MAX_DIGIT_COUNT = 5; + private static final int SHORT_MAX_ABS_VALUE = 32768; + private static final int INT_MAX_DIGIT_COUNT = 10; + private static final long INT_MAX_ABS_VALUE = 2147483648L; private static final int LONG_MAX_DIGIT_COUNT = 19; - // This is the upper limit for the count of decimal digits taken into account in the slow path. All digits exceeding - // this threshold are excluded. - private static final int SLOW_PATH_MAX_DIGIT_COUNT = 800; - private static final int SLOW_PATH_MAX_SHIFT = 60; - private static final byte[] SLOW_PATH_SHIFTS = { - 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, - 33, 36, 39, 43, 46, 49, 53, 56, 59, - }; - - private final Tape tape; - private final SlowPathDecimal slowPathDecimal = new SlowPathDecimal(); - - private int currentIdx; - - NumberParser(Tape tape) { - this.tape = tape; - } - void parseNumber(byte[] buffer, int offset) { + private final DigitsParsingResult digitsParsingResult = new DigitsParsingResult(); + private final ExponentParser exponentParser = new ExponentParser(); + private final DoubleParser doubleParser = new DoubleParser(); + private final FloatParser floatParser = new FloatParser(); + + void parseNumber(byte[] buffer, int offset, Tape tape) { boolean negative = buffer[offset] == '-'; - currentIdx = negative ? offset + 1 : offset; + int currentIdx = negative ? offset + 1 : offset; int digitsStartIdx = currentIdx; - long digits = parseDigits(buffer, 0); + DigitsParsingResult digitsParsingResult = parseDigits(buffer, currentIdx, 0); + long digits = digitsParsingResult.digits(); + currentIdx = digitsParsingResult.currentIdx(); int digitCount = currentIdx - digitsStartIdx; if (digitCount == 0) { throw new JsonParsingException("Invalid number. Minus has to be followed by a digit."); @@ -77,12 +38,14 @@ void parseNumber(byte[] buffer, int offset) { } long exponent = 0; - boolean isDouble = false; + boolean floatingPointNumber = false; if ('.' == buffer[currentIdx]) { - isDouble = true; + floatingPointNumber = true; currentIdx++; int firstIdxAfterPeriod = currentIdx; - digits = parseDigits(buffer, digits); + digitsParsingResult = parseDigits(buffer, currentIdx, digits); + digits = digitsParsingResult.digits(); + currentIdx = digitsParsingResult.currentIdx(); exponent = firstIdxAfterPeriod - currentIdx; if (exponent == 0) { throw new JsonParsingException("Invalid number. Decimal point has to be followed by a digit."); @@ -90,21 +53,18 @@ void parseNumber(byte[] buffer, int offset) { digitCount = currentIdx - digitsStartIdx; } if (isExponentIndicator(buffer[currentIdx])) { - isDouble = true; + floatingPointNumber = true; currentIdx++; - exponent = parseExponent(buffer, exponent); + ExponentParsingResult exponentParsingResult = exponentParser.parse(buffer, currentIdx, exponent); + exponent = exponentParsingResult.exponent(); + currentIdx = exponentParsingResult.currentIdx(); } if (!isStructuralOrWhitespace(buffer[currentIdx])) { throw new JsonParsingException("Number has to be followed by a structural character or whitespace."); } - if (isDouble) { - double d; - if (shouldBeHandledBySlowPath(buffer, digitsStartIdx, digitCount)) { - d = slowlyParseDouble(buffer, offset); - } else { - d = computeDouble(negative, digits, exponent); - } - tape.appendDouble(d); + if (floatingPointNumber) { + double value = doubleParser.parse(buffer, offset, negative, digitsStartIdx, digitCount, digits, exponent); + tape.appendDouble(value); } else { if (isOutOfLongRange(negative, digits, digitCount)) { throw new JsonParsingException("Number value is out of long range ([" + Long.MIN_VALUE + ", " + Long.MAX_VALUE + "])."); @@ -113,521 +73,291 @@ void parseNumber(byte[] buffer, int offset) { } } - private static boolean isOutOfLongRange(boolean negative, long digits, int digitCount) { - if (digitCount < LONG_MAX_DIGIT_COUNT) { + byte parseByte(byte[] buffer, int len, int offset) { + boolean negative = buffer[offset] == '-'; + + int currentIdx = negative ? offset + 1 : offset; + + int digitsStartIdx = currentIdx; + DigitsParsingResult digitsParsingResult = parseDigits(buffer, currentIdx, 0); + long digits = digitsParsingResult.digits(); + currentIdx = digitsParsingResult.currentIdx(); + int digitCount = currentIdx - digitsStartIdx; + if (digitCount == 0) { + throw new JsonParsingException("Invalid number. Minus has to be followed by a digit."); + } + if ('0' == buffer[digitsStartIdx] && digitCount > 1) { + throw new JsonParsingException("Invalid number. Leading zeroes are not allowed."); + } + + if (currentIdx < len && !isStructuralOrWhitespace(buffer[currentIdx])) { + throw new JsonParsingException("Number has to be followed by a structural character or whitespace."); + } + if (isOutOfByteRange(negative, digits, digitCount)) { + throw new JsonParsingException("Number value is out of byte range ([" + Byte.MIN_VALUE + ", " + Byte.MAX_VALUE + "])."); + } + return (byte) (negative ? (~digits + 1) : digits); + } + + private static boolean isOutOfByteRange(boolean negative, long digits, int digitCount) { + if (digitCount < BYTE_MAX_DIGIT_COUNT) { return false; } - if (digitCount > LONG_MAX_DIGIT_COUNT) { + if (digitCount > BYTE_MAX_DIGIT_COUNT) { return true; } - if (negative && digits == Long.MIN_VALUE) { - // The maximum value we can store in a long is 9223372036854775807. When we try to store 9223372036854775808, - // a long wraps around, resulting in -9223372036854775808 (Long.MIN_VALUE). If the number we are parsing is - // negative, and we've attempted to store 9223372036854775808 in "digits", we can be sure that we are - // dealing with Long.MIN_VALUE, which obviously does not fall outside the acceptable range. - return false; + if (negative) { + return digits > BYTE_MAX_ABS_VALUE; } - return digits < 0; + return digits > Byte.MAX_VALUE; } - private static double computeDouble(boolean negative, long significand10, long exp10) { - if (abs(exp10) < POWERS_OF_TEN.length && compareUnsigned(significand10, MAX_LONG_REPRESENTED_AS_DOUBLE_EXACTLY) <= 0) { - // This path has been described in https://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/. - double d = significand10; - if (exp10 < 0) { - d = d / POWERS_OF_TEN[(int) -exp10]; - } else { - d = d * POWERS_OF_TEN[(int) exp10]; - } - return negative ? -d : d; - } - - // The following path is an implementation of the Eisel-Lemire algorithm described by Daniel Lemire in - // "Number Parsing at a Gigabyte per Second" (https://arxiv.org/abs/2101.11408). - - if (exp10 < FAST_PATH_MIN_POWER_OF_TEN || significand10 == 0) { - return zero(negative); - } else if (exp10 > FAST_PATH_MAX_POWER_OF_TEN) { - return infinity(negative); - } - - // We start by normalizing the decimal significand so that it is within the range of [2^63, 2^64). - int lz = numberOfLeadingZeros(significand10); - significand10 <<= lz; - - // Initially, the number we are parsing is in the form of w * 10^q = w * 5^q * 2^q, and our objective is to - // convert it to m * 2^p. We can represent w * 10^q as w * 5^q * 2^r * 2^p, where w * 5^q * 2^r = m. - // Therefore, in the next step we compute w * 5^q. The implementation of this multiplication is optimized - // to minimize necessary operations while ensuring precise results. For further insight, refer to the - // aforementioned paper. - int powersOfFiveTableIndex = 2 * (int) (exp10 - FAST_PATH_MIN_POWER_OF_TEN); - long upper = unsignedMultiplyHigh(significand10, POWERS_OF_FIVE[powersOfFiveTableIndex]); - long lower = significand10 * POWERS_OF_FIVE[powersOfFiveTableIndex]; - if ((upper & 0x1FF) == 0x1FF) { - long secondUpper = unsignedMultiplyHigh(significand10, POWERS_OF_FIVE[powersOfFiveTableIndex + 1]); - lower += secondUpper; - if (compareUnsigned(secondUpper, lower) > 0) { - upper++; - } - // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without Fallback" - // (https://arxiv.org/abs/2212.06644), at this point we are sure that the product is sufficiently accurate, - // and more computation is not needed. - } - - // Here, we extract the binary significand from the product. Although in binary64 the significand has 53 bits, - // we extract 54 bits to use the least significant bit for rounding. Since both the decimal significand and the - // values stored in POWERS_OF_FIVE are normalized, ensuring that their most significant bits are set, the product - // has either 0 or 1 leading zeros. As a result, we need to perform a right shift of either 9 or 10 bits. - long upperBit = upper >>> 63; - long upperShift = upperBit + 9; - long significand2 = upper >>> upperShift; - - // Now, we have to determine the value of the binary exponent. Let's begin by calculating the contribution of - // 10^q. Our goal is to compute f0 and f1 such that: - // - when q >= 0: 10^q = (5^q / 2^(f0 - q)) * 2^f0 - // - when q < 0: 10^q = (2^(f1 - q) / 5^-q) * 2^f1 - // Both (5^q / 2^(f0 - q)) and (2^(f1 - q) / 5^-q) must fall within the range of [1, 2). - // It turns out that these conditions are met when: - // - 0 <= q <= FAST_PATH_MAX_POWER_OF_TEN, and f0 = floor(log2(5^q)) + q = floor(q * log(5) / log(2)) + q = (217706 * q) / 2^16. - // - FAST_PATH_MIN_POWER_OF_TEN <= q < 0, and f1 = -ceil(log2(5^-q)) + q = -ceil(-q * log(5) / log(2)) + q = (217706 * q) / 2^16. - // Thus, we can express the contribution of 10^q to the exponent as (217706 * exp10) >> 16. - // - // Furthermore, we need to factor in the following normalizations we've performed: - // - shifting the decimal significand left bitwise - // - shifting the binary significand right bitwise if the most significant bit of the product was 1 - // Therefore, we add (63 - lz + upperBit) to the exponent. - long exp2 = ((217706 * exp10) >> 16) + 63 - lz + upperBit; - if (exp2 < IEEE64_MIN_FINITE_NUMBER_EXPONENT) { - // In the next step, we right-shift the binary significand by the difference between the minimum exponent - // and the binary exponent. In Java, the shift distance is limited to the range of 0 to 63, inclusive. - // Thus, we need to handle the case when the distance is >= 64 separately and always return zero. - if (exp2 <= IEEE64_MIN_FINITE_NUMBER_EXPONENT - 64) { - return zero(negative); - } + short parseShort(byte[] buffer, int len, int offset) { + boolean negative = buffer[offset] == '-'; - // In this branch, it is likely that we are handling a subnormal number. Therefore, we adjust the significand - // to conform to the formula representing subnormal numbers: (significand2 * 2^(1 - IEEE64_EXPONENT_BIAS)) / 2^52. - significand2 >>= 1 - IEEE64_EXPONENT_BIAS - exp2; - // Round up if the significand is odd and remove the least significant bit that we've left for rounding. - significand2 += significand2 & 1; - significand2 >>= 1; - - // Here, we are addressing a scenario in which the original number was subnormal, but it became normal after - // rounding up. For example, when we are parsing 2.2250738585072013e-308 before rounding and removing the - // least significant bit significand2 = 0x3fffffffffffff and exp2 = -1023. After rounding, we get - // significand2 = 0x10000000000000, which is the significand of the smallest normal number. - exp2 = (significand2 < (1L << 52)) ? IEEE64_SUBNORMAL_EXPONENT : IEEE64_MIN_FINITE_NUMBER_EXPONENT; - return toDouble(negative, significand2, exp2); - } - - // Here, we are addressing a scenario of rounding the binary significand when it falls precisely halfway - // between two integers. To understand the rationale behind the condition used to identify this case, refer to - // sections 6, 8.1, and 9.1 of "Number Parsing at a Gigabyte per Second". - if ((compareUnsigned(lower, 1) <= 0) && (exp10 >= -4) && (exp10 <= 23) && ((significand2 & 3) == 1)) { - if (significand2 << upperShift == upper) { - significand2 &= ~1; - } - } + int currentIdx = negative ? offset + 1 : offset; - // Round up if the significand is odd and remove the least significant bit that we've left for rounding. - significand2 += significand2 & 1; - significand2 >>= 1; + int digitsStartIdx = currentIdx; + DigitsParsingResult digitsParsingResult = parseDigits(buffer, currentIdx, 0); + long digits = digitsParsingResult.digits(); + currentIdx = digitsParsingResult.currentIdx(); + int digitCount = currentIdx - digitsStartIdx; + if (digitCount == 0) { + throw new JsonParsingException("Invalid number. Minus has to be followed by a digit."); + } + if ('0' == buffer[digitsStartIdx] && digitCount > 1) { + throw new JsonParsingException("Invalid number. Leading zeroes are not allowed."); + } - if (significand2 == (1L << IEEE64_SIGNIFICAND_SIZE_IN_BITS)) { - // If we've reached here, it means that rounding has caused an overflow. We need to divide the significand - // by 2 and update the exponent accordingly. - significand2 >>= 1; - exp2++; + if (currentIdx < len && !isStructuralOrWhitespace(buffer[currentIdx])) { + throw new JsonParsingException("Number has to be followed by a structural character or whitespace."); + } + if (isOutOfShortRange(negative, digits, digitCount)) { + throw new JsonParsingException("Number value is out of short range ([" + Short.MIN_VALUE + ", " + Short.MAX_VALUE + "])."); } + return (short) (negative ? (~digits + 1) : digits); + } - if (exp2 > IEEE64_MAX_FINITE_NUMBER_EXPONENT) { - return infinity(negative); + private static boolean isOutOfShortRange(boolean negative, long digits, int digitCount) { + if (digitCount < SHORT_MAX_DIGIT_COUNT) { + return false; + } + if (digitCount > SHORT_MAX_DIGIT_COUNT) { + return true; + } + if (negative) { + return digits > SHORT_MAX_ABS_VALUE; } - return toDouble(negative, significand2, exp2); + return digits > Short.MAX_VALUE; } - // The following parser is based on the idea described in - // https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html and implemented in - // https://github.com/simdjson/simdjson/blob/caff09cafceb0f5f6fc9109236d6dd09ac4bc0d8/src/from_chars.cpp - private double slowlyParseDouble(byte[] buffer, int offset) { - SlowPathDecimal decimal = slowPathDecimal; - decimal.reset(); + int parseInt(byte[] buffer, int len, int offset) { + boolean negative = buffer[offset] == '-'; - decimal.negative = buffer[offset] == '-'; - currentIdx = decimal.negative ? offset + 1 : offset; - long exp10 = 0; + int currentIdx = negative ? offset + 1 : offset; - skipZeros(buffer); - parseDigits(buffer, decimal); - if (buffer[currentIdx] == '.') { - currentIdx++; - int firstIdxAfterPeriod = currentIdx; - if (decimal.digitCount == 0) { - skipZeros(buffer); - } - parseDigits(buffer, decimal); - exp10 = firstIdxAfterPeriod - currentIdx; + int digitsStartIdx = currentIdx; + DigitsParsingResult digitsParsingResult = parseDigits(buffer, currentIdx, 0); + long digits = digitsParsingResult.digits(); + currentIdx = digitsParsingResult.currentIdx(); + int digitCount = currentIdx - digitsStartIdx; + if (digitCount == 0) { + throw new JsonParsingException("Invalid number. Minus has to be followed by a digit."); } - - int currentIdxMovingBackwards = currentIdx - 1; - int trailingZeros = 0; - // Here, we also skip the period to handle cases like 100000000000000000000.000000 - while (buffer[currentIdxMovingBackwards] == '0' || buffer[currentIdxMovingBackwards] == '.') { - if (buffer[currentIdxMovingBackwards] == '0') { - trailingZeros++; - } - currentIdxMovingBackwards--; + if ('0' == buffer[digitsStartIdx] && digitCount > 1) { + throw new JsonParsingException("Invalid number. Leading zeroes are not allowed."); } - exp10 += decimal.digitCount; - decimal.digitCount -= trailingZeros; - if (decimal.digitCount > SLOW_PATH_MAX_DIGIT_COUNT) { - decimal.digitCount = SLOW_PATH_MAX_DIGIT_COUNT; - decimal.truncated = true; + if (currentIdx < len && !isStructuralOrWhitespace(buffer[currentIdx])) { + throw new JsonParsingException("Number has to be followed by a structural character or whitespace."); } - - if (isExponentIndicator(buffer[currentIdx])) { - currentIdx++; - exp10 = parseExponent(buffer, exp10); - } - - // At this point, the number we are parsing is represented in the following way: w * 10^exp10, where -1 < w < 1. - if (exp10 <= -324) { - // We know that -1e-324 < w * 10^exp10 < 1e-324. In binary64 -1e-324 = -0.0 and 1e-324 = +0.0, so we can - // safely return +/-0.0. - return zero(decimal.negative); - } else if (exp10 >= 310) { - // We know that either w * 10^exp10 <= -0.1e310 or w * 10^exp10 >= 0.1e310. - // In binary64 -0.1e310 = -inf and 0.1e310 = +inf, so we can safely return +/-inf. - return infinity(decimal.negative); - } - - decimal.exp10 = (int) exp10; - int exp2 = 0; - - // We start the following loop with the decimal in the form of w * 10^exp10. After a series of - // right-shifts (dividing by a power of 2), we transform the decimal into w' * 2^exp2 * 10^exp10, - // where exp10 is <= 0. Resultantly, w' * 10^exp10 is in the range of [0, 1). - while (decimal.exp10 > 0) { - int shift = resolveShiftDistanceBasedOnExponent10(decimal.exp10); - decimal.shiftRight(shift); - exp2 += shift; - } - - // Now, we are left-shifting to get to the point where w'' * 10^exp10 is within the range of [1/2, 1). - while (decimal.exp10 <= 0) { - int shift; - if (decimal.exp10 == 0) { - if (decimal.digits[0] >= 5) { - break; - } - shift = (decimal.digits[0] < 2) ? 2 : 1; - } else { - shift = resolveShiftDistanceBasedOnExponent10(-decimal.exp10); - } - decimal.shiftLeft(shift); - exp2 -= shift; + if (isOutOfIntRange(negative, digits, digitCount)) { + throw new JsonParsingException("Number value is out of int range ([" + Integer.MIN_VALUE + ", " + Integer.MAX_VALUE + "])."); } + return (int) (negative ? (~digits + 1) : digits); + } - // Here, w'' * 10^exp10 falls within the range of [1/2, 1). In binary64, the significand must be within the - // range of [1, 2). We can get to the target range by decreasing the binary exponent. Resultantly, the decimal - // is represented as w'' * 10^exp10 * 2^exp2, where w'' * 10^exp10 is in the range of [1, 2). - exp2--; - - while (IEEE64_MIN_FINITE_NUMBER_EXPONENT > exp2) { - int n = IEEE64_MIN_FINITE_NUMBER_EXPONENT - exp2; - if (n > SLOW_PATH_MAX_SHIFT) { - n = SLOW_PATH_MAX_SHIFT; - } - decimal.shiftRight(n); - exp2 += n; + private static boolean isOutOfIntRange(boolean negative, long digits, int digitCount) { + if (digitCount < INT_MAX_DIGIT_COUNT) { + return false; + } + if (digitCount > INT_MAX_DIGIT_COUNT) { + return true; + } + if (negative) { + return digits > INT_MAX_ABS_VALUE; } + return digits > Integer.MAX_VALUE; + } - // To conform to the IEEE 754 standard, the binary significand must fall within the range of [2^52, 2^53). Hence, - // we perform the following multiplication. If, after this step, the significand is less than 2^52, we have a - // subnormal number, which we will address later. - decimal.shiftLeft(IEEE64_SIGNIFICAND_SIZE_IN_BITS); + long parseLong(byte[] buffer, int len, int offset) { + boolean negative = buffer[offset] == '-'; + + int currentIdx = negative ? offset + 1 : offset; - long significand2 = decimal.computeSignificand(); - if (significand2 >= (1L << IEEE64_SIGNIFICAND_SIZE_IN_BITS)) { - // If we've reached here, it means that rounding has caused an overflow. We need to divide the significand - // by 2 and update the exponent accordingly. - significand2 >>= 1; - exp2++; + int digitsStartIdx = currentIdx; + DigitsParsingResult digitsParsingResult = parseDigits(buffer, currentIdx, 0); + long digits = digitsParsingResult.digits(); + currentIdx = digitsParsingResult.currentIdx(); + int digitCount = currentIdx - digitsStartIdx; + if (digitCount == 0) { + throw new JsonParsingException("Invalid number. Minus has to be followed by a digit."); + } + if ('0' == buffer[digitsStartIdx] && digitCount > 1) { + throw new JsonParsingException("Invalid number. Leading zeroes are not allowed."); } - if (significand2 < (1L << IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT)) { - exp2 = IEEE64_SUBNORMAL_EXPONENT; + if (currentIdx < len && !isStructuralOrWhitespace(buffer[currentIdx])) { + throw new JsonParsingException("Number has to be followed by a structural character or whitespace."); } - if (exp2 > IEEE64_MAX_FINITE_NUMBER_EXPONENT) { - return infinity(decimal.negative); + if (isOutOfLongRange(negative, digits, digitCount)) { + throw new JsonParsingException("Number value is out of long range ([" + Long.MIN_VALUE + ", " + Long.MAX_VALUE + "])."); } - return toDouble(decimal.negative, significand2, exp2); + return negative ? (~digits + 1) : digits; } - private static int resolveShiftDistanceBasedOnExponent10(int exp10) { - return (exp10 < SLOW_PATH_SHIFTS.length) ? SLOW_PATH_SHIFTS[exp10] : SLOW_PATH_MAX_SHIFT; - } + float parseFloat(byte[] buffer, int len, int offset) { + boolean negative = buffer[offset] == '-'; - private long parseExponent(byte[] buffer, long exponent) { - boolean negative = '-' == buffer[currentIdx]; - if (negative || '+' == buffer[currentIdx]) { - currentIdx++; + int currentIdx = negative ? offset + 1 : offset; + + int digitsStartIdx = currentIdx; + DigitsParsingResult digitsParsingResult = parseDigits(buffer, currentIdx, 0); + currentIdx = digitsParsingResult.currentIdx(); + int digitCount = currentIdx - digitsStartIdx; + if (digitCount == 0) { + throw new JsonParsingException("Invalid number. Minus has to be followed by a digit."); } - int exponentStartIdx = currentIdx; - long parsedExponent = parseDigits(buffer, 0); - if (exponentStartIdx == currentIdx) { - throw new JsonParsingException("Invalid number. Exponent indicator has to be followed by a digit."); - } - // Long.MAX_VALUE = 9223372036854775807 (19 digits). Therefore, any number with <= 18 digits can be safely - // stored in a long without causing an overflow. - int maxDigitCountLongCanAccommodate = 18; - if (currentIdx > exponentStartIdx + maxDigitCountLongCanAccommodate) { - // Potentially, we have an overflow here. We try to skip leading zeros. - while (buffer[exponentStartIdx] == '0') { - exponentStartIdx++; - } - if (currentIdx > exponentStartIdx + maxDigitCountLongCanAccommodate) { - // We still have more digits than a long can safely accommodate. - // - // The largest finite number that can be represented in binary64 is (1-2^-53) * 2^1024, which is about - // 1.798e308, and the smallest non-zero number is 2^-1074, roughly 4.941e-324. So, we might, potentially, - // care only about numbers with explicit exponents falling within the range of [-324, 308], and return - // either zero or infinity for everything outside of this range.However, we have to take into account - // the fractional part of the parsed number. This part can potentially cancel out the value of the - // explicit exponent. For example, 1000e-325 (1 * 10^3 * 10^-325 = 1 * 10^-322) is not equal to zero - // despite the explicit exponent being less than -324. - // - // Let's consider a scenario where the explicit exponent is greater than 999999999999999999. As long as - // the fractional part has <= 999999999999999690 digits, it doesn't matter whether we take - // 999999999999999999 or its actual value as the explicit exponent. This is due to the fact that the - // parsed number is infinite anyway (w * 10^-q * 10^999999999999999999 > (1-2^-53) * 2^1024, 0 < w < 10, - // 0 <= q <= 999999999999999690). Similarly, in a scenario where the explicit exponent is less than - // -999999999999999999, as long as the fractional part has <= 999999999999999674 digits, we can safely - // take 999999999999999999 as the explicit exponent, given that the parsed number is zero anyway - // (w * 10^q * 10^-999999999999999999 < 2^-1074, 0 < w < 10, 0 <= q <= 999999999999999674) - // - // Note that if the fractional part had 999999999999999674 digits, the JSON size would need to be - // 999999999999999674 bytes, which is approximately ~888 PiB. Consequently, it's reasonable to assume - // that the fractional part contains no more than 999999999999999674 digits. - parsedExponent = 999999999999999999L; - } + if ('0' == buffer[digitsStartIdx] && digitCount > 1) { + throw new JsonParsingException("Invalid number. Leading zeroes are not allowed."); } - // Note that we don't check if 'exponent' has overflowed after the following addition. This is because we - // know that the parsed exponent falls within the range of [-999999999999999999, 999999999999999999]. We also - // assume that 'exponent' before the addition is within the range of [-9223372036854775808, 9223372036854775807]. - // This assumption should always be valid as the value of 'exponent' is constrained by the size of the JSON input. - exponent += negative ? -parsedExponent : parsedExponent; - return exponent; - } - private long parseDigits(byte[] buffer, long digits) { - byte digit = convertCharacterToDigit(buffer[currentIdx]); - while (digit >= 0 && digit <= 9) { - digits = 10 * digits + digit; + long exponent = 0; + boolean floatingPointNumber = false; + if ('.' == buffer[currentIdx]) { + floatingPointNumber = true; currentIdx++; - digit = convertCharacterToDigit(buffer[currentIdx]); + int firstIdxAfterPeriod = currentIdx; + digitsParsingResult = parseDigits(buffer, currentIdx, digitsParsingResult.digits()); + currentIdx = digitsParsingResult.currentIdx(); + exponent = firstIdxAfterPeriod - currentIdx; + if (exponent == 0) { + throw new JsonParsingException("Invalid number. Decimal point has to be followed by a digit."); + } + digitCount = currentIdx - digitsStartIdx; } - return digits; - } - - private static boolean shouldBeHandledBySlowPath(byte[] buffer, int startDigitsIdx, int digitCount) { - if (digitCount <= FAST_PATH_MAX_DIGIT_COUNT) { - return false; + if (isExponentIndicator(buffer[currentIdx])) { + floatingPointNumber = true; + currentIdx++; + ExponentParsingResult exponentParsingResult = exponentParser.parse(buffer, currentIdx, exponent); + exponent = exponentParsingResult.exponent(); + currentIdx = exponentParsingResult.currentIdx(); } - int start = startDigitsIdx; - while (buffer[start] == '0' || buffer[start] == '.') { - start++; + if (!floatingPointNumber) { + throw new JsonParsingException("Invalid floating-point number. Fraction or exponent part is missing."); } - int significantDigitCount = digitCount - (start - startDigitsIdx); - return significantDigitCount > FAST_PATH_MAX_DIGIT_COUNT; + if (currentIdx < len && !isStructuralOrWhitespace(buffer[currentIdx])) { + throw new JsonParsingException("Number has to be followed by a structural character or whitespace."); + } + + return floatParser.parse(buffer, offset, negative, digitsStartIdx, digitCount, digitsParsingResult.digits(), exponent); } - private void skipZeros(byte[] buffer) { - while (buffer[currentIdx] == '0') { - currentIdx++; + double parseDouble(byte[] buffer, int len, int offset) { + boolean negative = buffer[offset] == '-'; + + int currentIdx = negative ? offset + 1 : offset; + + int digitsStartIdx = currentIdx; + DigitsParsingResult digitsParsingResult = parseDigits(buffer, currentIdx, 0); + currentIdx = digitsParsingResult.currentIdx(); + int digitCount = currentIdx - digitsStartIdx; + if (digitCount == 0) { + throw new JsonParsingException("Invalid number. Minus has to be followed by a digit."); + } + if ('0' == buffer[digitsStartIdx] && digitCount > 1) { + throw new JsonParsingException("Invalid number. Leading zeroes are not allowed."); } - } - private void parseDigits(byte[] buffer, SlowPathDecimal decimal) { - while (isDigit(buffer[currentIdx])) { - if (decimal.digitCount < SLOW_PATH_MAX_DIGIT_COUNT) { - decimal.digits[decimal.digitCount] = convertCharacterToDigit(buffer[currentIdx]); + long exponent = 0; + boolean floatingPointNumber = false; + if ('.' == buffer[currentIdx]) { + floatingPointNumber = true; + currentIdx++; + int firstIdxAfterPeriod = currentIdx; + digitsParsingResult = parseDigits(buffer, currentIdx, digitsParsingResult.digits()); + currentIdx = digitsParsingResult.currentIdx(); + exponent = firstIdxAfterPeriod - currentIdx; + if (exponent == 0) { + throw new JsonParsingException("Invalid number. Decimal point has to be followed by a digit."); } - decimal.digitCount++; + digitCount = currentIdx - digitsStartIdx; + } + if (isExponentIndicator(buffer[currentIdx])) { + floatingPointNumber = true; currentIdx++; + ExponentParsingResult exponentParsingResult = exponentParser.parse(buffer, currentIdx, exponent); + exponent = exponentParsingResult.exponent(); + currentIdx = exponentParsingResult.currentIdx(); + } + if (!floatingPointNumber) { + throw new JsonParsingException("Invalid floating-point number. Fraction or exponent part is missing."); + } + if (currentIdx < len && !isStructuralOrWhitespace(buffer[currentIdx])) { + throw new JsonParsingException("Number has to be followed by a structural character or whitespace."); } - } - - private static boolean isDigit(byte b) { - return b >= '0' && b <= '9'; - } - - private static boolean isExponentIndicator(byte b) { - return 'e' == b || 'E' == b; - } - private static double toDouble(boolean negative, long significand2, long exp2) { - long bits = significand2; - bits &= ~(1L << IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT); // clear the implicit bit - bits |= (exp2 + IEEE64_EXPONENT_BIAS) << IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT; - bits = negative ? (bits | (1L << IEEE64_SIGN_BIT_INDEX)) : bits; - return longBitsToDouble(bits); + return doubleParser.parse(buffer, offset, negative, digitsStartIdx, digitCount, digitsParsingResult.digits(), exponent); } - private static double infinity(boolean negative) { - return negative ? NEGATIVE_INFINITY : POSITIVE_INFINITY; + private static boolean isOutOfLongRange(boolean negative, long digits, int digitCount) { + if (digitCount < LONG_MAX_DIGIT_COUNT) { + return false; + } + if (digitCount > LONG_MAX_DIGIT_COUNT) { + return true; + } + if (negative && digits == Long.MIN_VALUE) { + // The maximum value we can store in a long is 9223372036854775807. When we try to store 9223372036854775808, + // a long wraps around, resulting in -9223372036854775808 (Long.MIN_VALUE). If the number we are parsing is + // negative, and we've attempted to store 9223372036854775808 in "digits", we can be sure that we are + // dealing with Long.MIN_VALUE, which obviously does not fall outside the acceptable range. + return false; + } + return digits < 0; } - private static double zero(boolean negative) { - return negative ? -0.0 : 0.0; + private DigitsParsingResult parseDigits(byte[] buffer, int currentIdx, long digits) { + byte digit = convertCharacterToDigit(buffer[currentIdx]); + while (digit >= 0 && digit <= 9) { + digits = 10 * digits + digit; + currentIdx++; + digit = convertCharacterToDigit(buffer[currentIdx]); + } + return digitsParsingResult.of(digits, currentIdx); } private static byte convertCharacterToDigit(byte b) { return (byte) (b - '0'); } - private static class SlowPathDecimal { + private static class DigitsParsingResult { - final byte[] digits = new byte[SLOW_PATH_MAX_DIGIT_COUNT]; - int digitCount; - int exp10; - boolean truncated; - boolean negative; + private long digits; + private int currentIdx; - // Before calling this method we have to make sure that the significand is within the range of [0, 2^53 - 1]. - long computeSignificand() { - if (digitCount == 0 || exp10 < 0) { - return 0; - } - long significand = 0; - for (int i = 0; i < exp10; i++) { - significand = (10 * significand) + ((i < digitCount) ? digits[i] : 0); - } - boolean roundUp = false; - if (exp10 < digitCount) { - roundUp = digits[exp10] >= 5; - if ((digits[exp10] == 5) && (exp10 + 1 == digitCount)) { - // If the digits haven't been truncated, then we are exactly halfway between two integers. In such - // cases, we round to even, otherwise we round up. - roundUp = truncated || (significand & 1) == 1; - } - } - return roundUp ? ++significand : significand; + DigitsParsingResult of(long digits, int currentIdx) { + this.digits = digits; + this.currentIdx = currentIdx; + return this; } - void shiftLeft(int shift) { - if (digitCount == 0) { - return; - } - - int numberOfAdditionalDigits = calculateNumberOfAdditionalDigitsAfterLeftShift(shift); - int readIndex = digitCount - 1; - int writeIndex = digitCount - 1 + numberOfAdditionalDigits; - long n = 0; - - while (readIndex >= 0) { - n += (long) digits[readIndex] << shift; - long quotient = divideUnsigned(n, 10); - long remainder = remainderUnsigned(n, 10); - if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { - digits[writeIndex] = (byte) remainder; - } else if (remainder > 0) { - truncated = true; - } - n = quotient; - writeIndex--; - readIndex--; - } - - while (compareUnsigned(n, 0) > 0) { - long quotient = divideUnsigned(n, 10); - long remainder = remainderUnsigned(n, 10); - if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { - digits[writeIndex] = (byte) remainder; - } else if (remainder > 0) { - truncated = true; - } - n = quotient; - writeIndex--; - } - digitCount += numberOfAdditionalDigits; - if (digitCount > SLOW_PATH_MAX_DIGIT_COUNT) { - digitCount = SLOW_PATH_MAX_DIGIT_COUNT; - } - exp10 += numberOfAdditionalDigits; - trimTrailingZeros(); - } - - // See https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html#hpd-shifts - private int calculateNumberOfAdditionalDigitsAfterLeftShift(int shift) { - int a = NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT[shift]; - int b = NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT[shift + 1]; - int newDigitCount = a >> 11; - int pow5OffsetA = 0x7FF & a; - int pow5OffsetB = 0x7FF & b; - - int n = pow5OffsetB - pow5OffsetA; - for (int i = 0; i < n; i++) { - if (i >= digitCount) { - return newDigitCount - 1; - } else if (digits[i] < POWER_OF_FIVE_DIGITS[pow5OffsetA + i]) { - return newDigitCount - 1; - } else if (digits[i] > POWER_OF_FIVE_DIGITS[pow5OffsetA + i]) { - return newDigitCount; - } - } - return newDigitCount; - } - - void shiftRight(int shift) { - int readIndex = 0; - int writeIndex = 0; - long n = 0; - - while ((n >>> shift) == 0) { - if (readIndex < digitCount) { - n = (10 * n) + digits[readIndex++]; - } else if (n == 0) { - return; - } else { - while ((n >>> shift) == 0) { - n = 10 * n; - readIndex++; - } - break; - } - } - exp10 -= (readIndex - 1); - long mask = (1L << shift) - 1; - while (readIndex < digitCount) { - byte newDigit = (byte) (n >>> shift); - n = (10 * (n & mask)) + digits[readIndex++]; - digits[writeIndex++] = newDigit; - } - while (compareUnsigned(n, 0) > 0) { - byte newDigit = (byte) (n >>> shift); - n = 10 * (n & mask); - if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { - digits[writeIndex++] = newDigit; - } else if (newDigit > 0) { - truncated = true; - } - } - digitCount = writeIndex; - trimTrailingZeros(); - } - - private void trimTrailingZeros() { - while ((digitCount > 0) && (digits[digitCount - 1] == 0)) { - digitCount--; - } + long digits() { + return digits; } - private void reset() { - digitCount = 0; - exp10 = 0; - truncated = false; + int currentIdx() { + return currentIdx; } } } diff --git a/src/main/java/org/simdjson/NumberParserTables.java b/src/main/java/org/simdjson/NumberParserTables.java index 58f1fa0..3cd8751 100644 --- a/src/main/java/org/simdjson/NumberParserTables.java +++ b/src/main/java/org/simdjson/NumberParserTables.java @@ -73,6 +73,8 @@ class NumberParserTables { 6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5 }; + static final int MIN_POWER_OF_FIVE = -342; + static final long[] POWERS_OF_FIVE = { 0xeef453d6923bd65aL, 0x113faa2906a13b3fL, 0x9558b4661b6565f8L, 0x4ac7ca59a424c507L, diff --git a/src/main/java/org/simdjson/OnDemandJsonIterator.java b/src/main/java/org/simdjson/OnDemandJsonIterator.java new file mode 100644 index 0000000..5376504 --- /dev/null +++ b/src/main/java/org/simdjson/OnDemandJsonIterator.java @@ -0,0 +1,675 @@ +package org.simdjson; + +import java.util.Arrays; + +import static org.simdjson.CharacterUtils.isStructuralOrWhitespace; + +class OnDemandJsonIterator { + + private static final byte SPACE = 0x20; + private static final int[] SKIP_DEPTH_PER_CHARACTER = new int[127]; + + static { + Arrays.fill(SKIP_DEPTH_PER_CHARACTER, 0); + SKIP_DEPTH_PER_CHARACTER['['] = 1; + SKIP_DEPTH_PER_CHARACTER['{'] = 1; + SKIP_DEPTH_PER_CHARACTER[']'] = -1; + SKIP_DEPTH_PER_CHARACTER['}'] = -1; + } + + private final BitIndexes indexer; + private final int padding; + private final StringParser stringParser = new StringParser(); + private final NumberParser numberParser = new NumberParser(); + + private byte[] buffer; + private int len; + private int depth; + + OnDemandJsonIterator(BitIndexes indexer, int padding) { + this.indexer = indexer; + this.padding = padding; + } + + void init(byte[] buffer, int len) { + if (indexer.isEnd()) { + throw new JsonParsingException("No structural element found."); + } + this.buffer = buffer; + this.len = len; + this.depth = 1; + } + + void skipChild() { + skipChild(depth - 1); + } + + void skipChild(int parentDepth) { + if (depth <= parentDepth) { + return; + } + int idx = indexer.getAndAdvance(); + byte character = buffer[idx]; + + switch (character) { + case '[', '{', ':', ',': + break; + case '"': + if (buffer[indexer.peek()] == ':') { + indexer.advance(); // skip ':' + break; + } + default: + depth--; + if (depth <= parentDepth) { + return; + } + } + + while (indexer.hasNext()) { + idx = indexer.getAndAdvance(); + character = buffer[idx]; + + int delta = SKIP_DEPTH_PER_CHARACTER[character]; + depth += delta; + if (delta < 0 && depth <= parentDepth) { + return; + } + } + + throw new JsonParsingException("Not enough close braces."); + } + + Boolean getRootNonNullBoolean() { + int idx = indexer.getAndAdvance(); + Boolean result = switch (buffer[idx]) { + case 't' -> visitRootTrueAtom(idx); + case 'f' -> visitRootFalseAtom(idx); + default -> throw new JsonParsingException("Unrecognized boolean value. Expected: 'true' or 'false'."); + }; + assertNoMoreJsonValues(); + depth--; + return result; + } + + Boolean getRootBoolean() { + int idx = indexer.getAndAdvance(); + Boolean result = switch (buffer[idx]) { + case 't' -> visitRootTrueAtom(idx); + case 'f' -> visitRootFalseAtom(idx); + case 'n' -> { + visitRootNullAtom(idx); + yield null; + } + default -> throw new JsonParsingException("Unrecognized boolean value. Expected: 'true', 'false' or 'null'."); + }; + assertNoMoreJsonValues(); + depth--; + return result; + } + + private Boolean visitRootTrueAtom(int idx) { + boolean valid = idx + 4 <= len && isTrue(idx) && (idx + 4 == len || isStructuralOrWhitespace(buffer[idx + 4])); + if (!valid) { + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'true'."); + } + return Boolean.TRUE; + } + + private Boolean visitRootFalseAtom(int idx) { + boolean valid = idx + 5 <= len && isFalse(idx) && (idx + 5 == len || isStructuralOrWhitespace(buffer[idx + 5])); + if (!valid) { + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'false'."); + } + return Boolean.FALSE; + } + + private void visitRootNullAtom(int idx) { + boolean valid = idx + 4 <= len && isNull(idx) && (idx + 4 == len || isStructuralOrWhitespace(buffer[idx + 4])); + if (!valid) { + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'null'."); + } + } + + private void visitNullAtom(int idx) { + if (!isNull(idx)) { + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'null'."); + } + } + + private boolean isNull(int idx) { + return buffer[idx] == 'n' + && buffer[idx + 1] == 'u' + && buffer[idx + 2] == 'l' + && buffer[idx + 3] == 'l'; + } + + Boolean getNonNullBoolean() { + int idx = indexer.getAndAdvance(); + Boolean result = switch (buffer[idx]) { + case 't' -> visitTrueAtom(idx); + case 'f' -> visitFalseAtom(idx); + default -> throw new JsonParsingException("Unrecognized boolean value. Expected: 'true' or 'false'."); + }; + depth--; + return result; + } + + Boolean getBoolean() { + int idx = indexer.getAndAdvance(); + Boolean result = switch (buffer[idx]) { + case 't' -> visitTrueAtom(idx); + case 'f' -> visitFalseAtom(idx); + case 'n' -> { + visitNullAtom(idx); + yield null; + } + default -> throw new JsonParsingException("Unrecognized boolean value. Expected: 'true', 'false' or 'null'."); + }; + depth--; + return result; + } + + private Boolean visitTrueAtom(int idx) { + boolean valid = isTrue(idx) && isStructuralOrWhitespace(buffer[idx + 4]); + if (!valid) { + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'true'."); + } + return Boolean.TRUE; + } + + private boolean isTrue(int idx) { + return buffer[idx] == 't' + && buffer[idx + 1] == 'r' + && buffer[idx + 2] == 'u' + && buffer[idx + 3] == 'e'; + } + + private Boolean visitFalseAtom(int idx) { + boolean valid = isFalse(idx) && isStructuralOrWhitespace(buffer[idx + 5]); + if (!valid) { + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'false'."); + } + return Boolean.FALSE; + } + + private boolean isFalse(int idx) { + return buffer[idx] == 'f' + && buffer[idx + 1] == 'a' + && buffer[idx + 2] == 'l' + && buffer[idx + 3] == 's' + && buffer[idx + 4] == 'e'; + } + + byte getRootNonNullByte() { + depth--; + int idx = indexer.getAndAdvance(); + byte[] copy = padRootNumber(idx); + byte value = numberParser.parseByte(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + Byte getRootByte() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + assertNoMoreJsonValues(); + return null; + } + byte[] copy = padRootNumber(idx); + byte value = numberParser.parseByte(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + byte getNonNullByte() { + depth--; + int idx = indexer.getAndAdvance(); + return numberParser.parseByte(buffer, len, idx); + } + + Byte getByte() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + return null; + } + return numberParser.parseByte(buffer, len, idx); + } + + short getRootNonNullShort() { + depth--; + int idx = indexer.getAndAdvance(); + byte[] copy = padRootNumber(idx); + short value = numberParser.parseShort(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + Short getRootShort() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + assertNoMoreJsonValues(); + return null; + } + byte[] copy = padRootNumber(idx); + short value = numberParser.parseShort(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + short getNonNullShort() { + depth--; + int idx = indexer.getAndAdvance(); + return numberParser.parseShort(buffer, len, idx); + } + + Short getShort() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + return null; + } + return numberParser.parseShort(buffer, len, idx); + } + + int getRootNonNullInt() { + depth--; + int idx = indexer.getAndAdvance(); + byte[] copy = padRootNumber(idx); + int value = numberParser.parseInt(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + Integer getRootInt() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + assertNoMoreJsonValues(); + return null; + } + byte[] copy = padRootNumber(idx); + int value = numberParser.parseInt(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + Integer getInt() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + return null; + } + return numberParser.parseInt(buffer, len, idx); + } + + int getNonNullInt() { + depth--; + int idx = indexer.getAndAdvance(); + return numberParser.parseInt(buffer, len, idx); + } + + long getRootNonNullLong() { + depth--; + int idx = indexer.getAndAdvance(); + byte[] copy = padRootNumber(idx); + long value = numberParser.parseLong(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + Long getRootLong() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + assertNoMoreJsonValues(); + return null; + } + byte[] copy = padRootNumber(idx); + long value = numberParser.parseLong(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + long getNonNullLong() { + depth--; + int idx = indexer.getAndAdvance(); + return numberParser.parseLong(buffer, len, idx); + } + + Long getLong() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + return null; + } + return numberParser.parseLong(buffer, len, idx); + } + + float getRootNonNullFloat() { + depth--; + int idx = indexer.getAndAdvance(); + byte[] copy = padRootNumber(idx); + float value = numberParser.parseFloat(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + Float getRootFloat() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + assertNoMoreJsonValues(); + return null; + } + byte[] copy = padRootNumber(idx); + float value = numberParser.parseFloat(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + double getRootNonNullDouble() { + depth--; + int idx = indexer.getAndAdvance(); + byte[] copy = padRootNumber(idx); + double value = numberParser.parseDouble(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + Double getRootDouble() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + assertNoMoreJsonValues(); + return null; + } + byte[] copy = padRootNumber(idx); + double value = numberParser.parseDouble(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + private byte[] padRootNumber(int idx) { + int remainingLen = len - idx; + byte[] copy = new byte[remainingLen + padding]; + System.arraycopy(buffer, idx, copy, 0, remainingLen); + Arrays.fill(copy, remainingLen, remainingLen + padding, SPACE); + return copy; + } + + double getNonNullDouble() { + depth--; + int idx = indexer.getAndAdvance(); + return numberParser.parseDouble(buffer, len, idx); + } + + Double getDouble() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + return null; + } + return numberParser.parseDouble(buffer, len, idx); + } + + float getNonNullFloat() { + depth--; + int idx = indexer.getAndAdvance(); + return numberParser.parseFloat(buffer, len, idx); + } + + Float getFloat() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + return null; + } + return numberParser.parseFloat(buffer, len, idx); + } + + int getRootString(byte[] stringBuffer) { + depth--; + int idx = indexer.getAndAdvance(); + int len = switch (buffer[idx]) { + case '"' -> stringParser.parseString(buffer, idx, stringBuffer); + case 'n' -> { + visitRootNullAtom(idx); + yield -1; + } + default -> throw new JsonParsingException("Invalid value starting at " + idx + ". Expected either string or 'null'."); + }; + assertNoMoreJsonValues(); + return len; + } + + int getString(byte[] stringBuffer) { + depth--; + int idx = indexer.getAndAdvance(); + return switch (buffer[idx]) { + case '"' -> stringParser.parseString(buffer, idx, stringBuffer); + case 'n' -> { + visitNullAtom(idx); + yield -1; + } + default -> throw new JsonParsingException("Invalid value starting at " + idx + ". Expected either string or 'null'."); + }; + } + + char getNonNullChar() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == '"') { + return stringParser.parseChar(buffer, idx); + } + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected string."); + } + + Character getChar() { + depth--; + int idx = indexer.getAndAdvance(); + return switch (buffer[idx]) { + case '"' -> stringParser.parseChar(buffer, idx); + case 'n' -> { + visitNullAtom(idx); + yield null; + } + default -> throw new JsonParsingException("Invalid value starting at " + idx + ". Expected either string or 'null'."); + }; + } + + char getRootNonNullChar() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == '"') { + char character = stringParser.parseChar(buffer, idx); + assertNoMoreJsonValues(); + return character; + } + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected string."); + } + + Character getRootChar() { + depth--; + int idx = indexer.getAndAdvance(); + Character character = switch (buffer[idx]) { + case '"' -> stringParser.parseChar(buffer, idx); + case 'n' -> { + visitRootNullAtom(idx); + yield null; + } + default -> throw new JsonParsingException("Invalid value starting at " + idx + ". Expected either string or 'null'."); + }; + assertNoMoreJsonValues(); + return character; + } + + IteratorResult startIteratingArray() { + int idx = indexer.peek(); + if (buffer[idx] == 'n') { + visitNullAtom(idx); + indexer.advance(); + depth--; + return IteratorResult.NULL; + } + if (buffer[idx] != '[') { + throw unexpectedCharException(idx, '['); + } + idx = indexer.advanceAndGet(); + if (buffer[idx] == ']') { + indexer.advance(); + depth--; + return IteratorResult.EMPTY; + } + depth++; + return IteratorResult.NOT_EMPTY; + } + + IteratorResult startIteratingRootArray() { + int idx = indexer.peek(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + indexer.advance(); + depth--; + return IteratorResult.NULL; + } + if (buffer[idx] != '[') { + throw unexpectedCharException(idx, '['); + } + if (buffer[indexer.getLast()] != ']') { + throw new JsonParsingException("Unclosed array. Missing ']' for starting '['."); + } + idx = indexer.advanceAndGet(); + if (buffer[idx] == ']') { + indexer.advance(); + depth--; + assertNoMoreJsonValues(); + return IteratorResult.EMPTY; + } + depth++; + return IteratorResult.NOT_EMPTY; + } + + boolean nextArrayElement() { + int idx = indexer.getAndAdvance(); + if (buffer[idx] == ']') { + depth--; + return false; + } else if (buffer[idx] == ',') { + depth++; + return true; + } else { + throw new JsonParsingException("Missing comma between array values"); + } + } + + IteratorResult startIteratingObject() { + int idx = indexer.peek(); + if (buffer[idx] == 'n') { + visitNullAtom(idx); + indexer.advance(); + depth--; + return IteratorResult.NULL; + } + if (buffer[idx] != '{') { + throw unexpectedCharException(idx, '{'); + } + idx = indexer.advanceAndGet(); + if (buffer[idx] == '}') { + indexer.advance(); + depth--; + return IteratorResult.EMPTY; + } + return IteratorResult.NOT_EMPTY; + } + + IteratorResult startIteratingRootObject() { + int idx = indexer.peek(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + indexer.advance(); + depth--; + return IteratorResult.NULL; + } + if (buffer[idx] != '{') { + throw unexpectedCharException(idx, '{'); + } + if (buffer[indexer.getLast()] != '}') { + throw new JsonParsingException("Unclosed object. Missing '}' for starting '{'."); + } + idx = indexer.advanceAndGet(); + if (buffer[idx] == '}') { + indexer.advance(); + depth--; + assertNoMoreJsonValues(); + return IteratorResult.EMPTY; + } + return IteratorResult.NOT_EMPTY; + } + + boolean nextObjectField() { + int idx = indexer.getAndAdvance(); + byte character = buffer[idx]; + if (character == '}') { + depth--; + return false; + } else if (character == ',') { + return true; + } else { + throw unexpectedCharException(idx, ','); + } + } + + void moveToFieldValue() { + int idx = indexer.getAndAdvance(); + if (buffer[idx] != ':') { + throw unexpectedCharException(idx, ':'); + } + depth++; + } + + int getFieldName(byte[] stringBuffer) { + int idx = indexer.getAndAdvance(); + if (buffer[idx] != '"') { + throw unexpectedCharException(idx, '"'); + } + return stringParser.parseString(buffer, idx, stringBuffer); + } + + int getDepth() { + return depth; + } + + private JsonParsingException unexpectedCharException(int idx, char expected) { + if (indexer.isPastEnd()) { + return new JsonParsingException("Expected '" + expected + "' but reached end of buffer."); + } else { + return new JsonParsingException("Expected '" + expected + "' but got: '" + (char) buffer[idx] + "'."); + } + } + + void assertNoMoreJsonValues() { + if (indexer.hasNext()) { + throw new JsonParsingException("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + } + + enum IteratorResult { + EMPTY, NULL, NOT_EMPTY + } +} diff --git a/src/main/java/org/simdjson/ResolvedClass.java b/src/main/java/org/simdjson/ResolvedClass.java new file mode 100644 index 0000000..67c6887 --- /dev/null +++ b/src/main/java/org/simdjson/ResolvedClass.java @@ -0,0 +1,165 @@ +package org.simdjson; + +import org.simdjson.annotations.JsonFieldName; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Modifier; +import java.lang.reflect.Parameter; +import java.lang.reflect.ParameterizedType; +import java.lang.reflect.Type; +import java.nio.charset.StandardCharsets; +import java.util.List; + +class ResolvedClass { + + enum ResolvedClassCategory { + BOOLEAN_PRIMITIVE(boolean.class, new boolean[0]), + BOOLEAN(Boolean.class, new Boolean[0]), + BYTE_PRIMITIVE(byte.class, new byte[0]), + BYTE(Byte.class, new Byte[0]), + CHAR_PRIMITIVE(char.class, new char[0]), + CHAR(Character.class, new Character[0]), + SHORT_PRIMITIVE(short.class, new short[0]), + SHORT(Short.class, new Short[0]), + INT_PRIMITIVE(int.class, new int[0]), + INT(Integer.class, new Integer[0]), + LONG_PRIMITIVE(long.class, new long[0]), + LONG(Long.class, new Long[0]), + DOUBLE_PRIMITIVE(double.class, new double[0]), + DOUBLE(Double.class, new Double[0]), + FLOAT_PRIMITIVE(float.class, new float[0]), + FLOAT(Float.class, new Float[0]), + STRING(String.class, new String[0]), + CUSTOM(null, null), + ARRAY(null, null), + LIST(List.class, null); + + private final Class cclass; + private final Object emptyArray; + + ResolvedClassCategory(Class cclass, Object emptyArray) { + this.cclass = cclass; + this.emptyArray = emptyArray; + } + + Object getEmptyArray() { + return emptyArray; + } + } + + private final ResolvedClassCategory classCategory; + private final Class rawClass; + private final ResolvedClass elementClass; + private final Constructor constructor; + private final ConstructorArgumentsMap argumentsMap; + + ResolvedClass(Type targetType, ClassResolver classResolver) { + if (targetType instanceof ParameterizedType parameterizedType) { + rawClass = (Class) parameterizedType.getRawType(); + elementClass = resolveElementClass(parameterizedType, classResolver); + } else { + rawClass = (Class) targetType; + elementClass = resolveElementClass(rawClass, classResolver); + } + + classCategory = resolveClassType(rawClass); + if (classCategory == ResolvedClassCategory.CUSTOM) { + checkIfCustomClassIsSupported(rawClass); + constructor = rawClass.getDeclaredConstructors()[0]; + constructor.setAccessible(true); + Parameter[] parameters = constructor.getParameters(); + argumentsMap = new ConstructorArgumentsMap(parameters.length); + for (int i = 0; i < parameters.length; i++) { + Type parameterType = parameters[i].getAnnotatedType().getType(); + String fieldName = resolveFieldName(parameters[i], rawClass); + byte[] fieldNameBytes = fieldName.getBytes(StandardCharsets.UTF_8); + argumentsMap.put(fieldNameBytes, new ConstructorArgument(i, classResolver.resolveClass(parameterType))); + } + } else { + constructor = null; + argumentsMap = null; + } + } + + private static ResolvedClass resolveElementClass(ParameterizedType parameterizedType, ClassResolver classResolver) { + if (parameterizedType.getRawType() != List.class) { + throw new JsonParsingException("Parametrized types other than java.util.List are not supported."); + } + return classResolver.resolveClass(parameterizedType.getActualTypeArguments()[0]); + } + + private static ResolvedClass resolveElementClass(Class cls, ClassResolver classResolver) { + if (cls == List.class) { + throw new JsonParsingException("Undefined list element type."); + } + if (cls.componentType() != null) { + return classResolver.resolveClass(cls.componentType()); + } else { + return null; + } + } + + private static ResolvedClassCategory resolveClassType(Class cls) { + if (Iterable.class.isAssignableFrom(cls) && cls != List.class) { + throw new JsonParsingException("Unsupported class: " + cls.getName() + + ". For JSON arrays at the root, use Java arrays. For inner JSON arrays, use either Java arrays or java.util.List."); + } + if (cls.isArray()) { + return ResolvedClassCategory.ARRAY; + } + for (ResolvedClassCategory t : ResolvedClassCategory.values()) { + if (t.cclass == cls) { + return t; + } + } + return ResolvedClassCategory.CUSTOM; + } + + private static void checkIfCustomClassIsSupported(Class cls) { + int modifiers = cls.getModifiers(); + if (cls.isMemberClass() && !Modifier.isStatic(modifiers)) { + throw new JsonParsingException("Unsupported class: " + cls.getName() + ". Inner non-static classes are not supported."); + } + if (Modifier.isAbstract(modifiers) || Modifier.isInterface(modifiers)) { + throw new JsonParsingException("Unsupported class: " + cls.getName() + ". Interfaces and abstract classes are not supported."); + } + Constructor[] constructors = cls.getDeclaredConstructors(); + if (constructors.length > 1) { + throw new JsonParsingException("Class: " + cls.getName() + " has more than one constructor."); + } + if (constructors.length == 0) { + throw new JsonParsingException("Class: " + cls.getName() + " doesn't have any constructor."); + } + } + + private static String resolveFieldName(Parameter parameter, Class targetClass) { + JsonFieldName annotation = parameter.getAnnotation(JsonFieldName.class); + if (annotation != null) { + return annotation.value(); + } + if (!targetClass.isRecord()) { + throw new JsonParsingException("Some of " + targetClass.getName() + "'s constructor arguments are not annotated with @JsonFieldName."); + } + return parameter.getName(); + } + + ConstructorArgumentsMap getArgumentsMap() { + return argumentsMap; + } + + Constructor getConstructor() { + return constructor; + } + + ResolvedClassCategory getClassCategory() { + return classCategory; + } + + ResolvedClass getElementClass() { + return elementClass; + } + + Class getRawClass() { + return rawClass; + } +} diff --git a/src/main/java/org/simdjson/SchemaBasedJsonIterator.java b/src/main/java/org/simdjson/SchemaBasedJsonIterator.java new file mode 100644 index 0000000..b48595d --- /dev/null +++ b/src/main/java/org/simdjson/SchemaBasedJsonIterator.java @@ -0,0 +1,735 @@ +package org.simdjson; + +import org.simdjson.OnDemandJsonIterator.IteratorResult; +import org.simdjson.ResolvedClass.ResolvedClassCategory; + +import java.lang.reflect.Array; +import java.lang.reflect.InvocationTargetException; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; + +import static java.nio.charset.StandardCharsets.UTF_8; + +class SchemaBasedJsonIterator { + + private static final int INITIAL_ARRAY_SIZE = 16; + + private final ClassResolver classResolver; + private final OnDemandJsonIterator jsonIterator; + private final byte[] stringBuffer; + + SchemaBasedJsonIterator(BitIndexes bitIndexes, byte[] stringBuffer, int padding) { + this.jsonIterator = new OnDemandJsonIterator(bitIndexes, padding); + this.classResolver = new ClassResolver(); + this.stringBuffer = stringBuffer; + } + + @SuppressWarnings("unchecked") + T walkDocument(byte[] padded, int len, Class expectedType) { + jsonIterator.init(padded, len); + classResolver.reset(); + + ResolvedClass resolvedExpectedClass = classResolver.resolveClass(expectedType); + return switch (resolvedExpectedClass.getClassCategory()) { + case BOOLEAN_PRIMITIVE -> (T) jsonIterator.getRootNonNullBoolean(); + case BOOLEAN -> (T) jsonIterator.getRootBoolean(); + case BYTE_PRIMITIVE -> (T) Byte.valueOf(jsonIterator.getRootNonNullByte()); + case BYTE -> (T) jsonIterator.getRootByte(); + case SHORT_PRIMITIVE -> (T) Short.valueOf(jsonIterator.getRootNonNullShort()); + case SHORT -> (T) jsonIterator.getRootShort(); + case INT_PRIMITIVE -> (T) Integer.valueOf(jsonIterator.getRootNonNullInt()); + case INT -> (T) jsonIterator.getRootInt(); + case LONG_PRIMITIVE -> (T) Long.valueOf(jsonIterator.getRootNonNullLong()); + case LONG -> (T) jsonIterator.getRootLong(); + case FLOAT_PRIMITIVE -> (T) Float.valueOf(jsonIterator.getRootNonNullFloat()); + case FLOAT -> (T) jsonIterator.getRootFloat(); + case DOUBLE_PRIMITIVE -> (T) Double.valueOf(jsonIterator.getRootNonNullDouble()); + case DOUBLE -> (T) jsonIterator.getRootDouble(); + case CHAR_PRIMITIVE -> (T) Character.valueOf(jsonIterator.getRootNonNullChar()); + case CHAR -> (T) jsonIterator.getRootChar(); + case STRING -> (T) getRootString(); + case ARRAY -> (T) getRootArray(resolvedExpectedClass.getElementClass()); + case CUSTOM -> (T) getRootObject(resolvedExpectedClass); + case LIST -> throw new JsonParsingException("Lists at the root are not supported. Consider using an array instead."); + }; + } + + private Object getRootObject(ResolvedClass expectedClass) { + IteratorResult result = jsonIterator.startIteratingRootObject(); + Object object = getObject(expectedClass, result); + jsonIterator.assertNoMoreJsonValues(); + return object; + } + + private Object getObject(ResolvedClass expectedClass) { + IteratorResult result = jsonIterator.startIteratingObject(); + return getObject(expectedClass, result); + } + + private Object getObject(ResolvedClass expectedClass, IteratorResult result) { + if (result == IteratorResult.NOT_EMPTY) { + ConstructorArgumentsMap argumentsMap = expectedClass.getArgumentsMap(); + Object[] args = new Object[argumentsMap.getArgumentCount()]; + int parentDepth = jsonIterator.getDepth() - 1; + collectArguments(argumentsMap, args); + jsonIterator.skipChild(parentDepth); + return createObject(expectedClass, args); + } else if (result == IteratorResult.EMPTY) { + ConstructorArgumentsMap argumentsMap = expectedClass.getArgumentsMap(); + Object[] args = new Object[argumentsMap.getArgumentCount()]; + return createObject(expectedClass, args); + } + return null; + } + + private Object createObject(ResolvedClass expectedClass, Object[] args) { + try { + return expectedClass.getConstructor().newInstance(args); + } catch (InstantiationException | IllegalAccessException | InvocationTargetException e) { + throw new JsonParsingException("Failed to construct an instance of " + expectedClass.getRawClass().getName(), e); + } + } + + private void collectArguments(ConstructorArgumentsMap argumentsMap, Object[] args) { + int collected = 0; + int argLen = args.length; + boolean hasFields = true; + while (collected < argLen && hasFields) { + int fieldNameLen = jsonIterator.getFieldName(stringBuffer); + jsonIterator.moveToFieldValue(); + ConstructorArgument argument = argumentsMap.get(stringBuffer, fieldNameLen); + if (argument != null) { + ResolvedClass argumentClass = argument.resolvedClass(); + collectArgument(argumentClass, args, argument); + collected++; + } else { + jsonIterator.skipChild(); + } + hasFields = jsonIterator.nextObjectField(); + } + } + + private void collectArgument(ResolvedClass argumentClass, Object[] args, ConstructorArgument argument) { + args[argument.idx()] = switch (argumentClass.getClassCategory()) { + case BOOLEAN_PRIMITIVE -> jsonIterator.getNonNullBoolean(); + case BOOLEAN -> jsonIterator.getBoolean(); + case BYTE_PRIMITIVE -> jsonIterator.getNonNullByte(); + case BYTE -> jsonIterator.getByte(); + case SHORT_PRIMITIVE -> jsonIterator.getNonNullShort(); + case SHORT -> jsonIterator.getShort(); + case INT_PRIMITIVE -> jsonIterator.getNonNullInt(); + case INT -> jsonIterator.getInt(); + case LONG_PRIMITIVE -> jsonIterator.getNonNullLong(); + case LONG -> jsonIterator.getLong(); + case FLOAT_PRIMITIVE -> jsonIterator.getNonNullFloat(); + case FLOAT -> jsonIterator.getFloat(); + case DOUBLE_PRIMITIVE -> jsonIterator.getNonNullDouble(); + case DOUBLE -> jsonIterator.getDouble(); + case CHAR_PRIMITIVE -> jsonIterator.getNonNullChar(); + case CHAR -> jsonIterator.getChar(); + case STRING -> getString(); + case ARRAY -> getArray(argumentClass.getElementClass()); + case LIST -> getList(argumentClass.getElementClass()); + case CUSTOM -> getObject(argument.resolvedClass()); + }; + } + + private List getList(ResolvedClass elementType) { + IteratorResult result = jsonIterator.startIteratingArray(); + if (result == IteratorResult.EMPTY) { + return Collections.emptyList(); + } + if (result == IteratorResult.NULL) { + return null; + } + + LinkedList list = new LinkedList<>(); + boolean hasElements = true; + + switch (elementType.getClassCategory()) { + case BOOLEAN -> { + while (hasElements) { + list.add(jsonIterator.getBoolean()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case BYTE -> { + while (hasElements) { + list.add(jsonIterator.getByte()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case CHAR -> { + while (hasElements) { + list.add(jsonIterator.getChar()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case SHORT -> { + while (hasElements) { + list.add(jsonIterator.getShort()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case INT -> { + while (hasElements) { + list.add(jsonIterator.getInt()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case LONG -> { + while (hasElements) { + list.add(jsonIterator.getLong()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case DOUBLE -> { + while (hasElements) { + list.add(jsonIterator.getDouble()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case FLOAT -> { + while (hasElements) { + list.add(jsonIterator.getFloat()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case STRING -> { + while (hasElements) { + list.add(getString()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case CUSTOM -> { + while (hasElements) { + list.add(getObject(elementType)); + hasElements = jsonIterator.nextArrayElement(); + } + } + case ARRAY -> { + while (hasElements) { + list.add(getArray(elementType.getElementClass())); + hasElements = jsonIterator.nextArrayElement(); + } + } + case LIST -> { + while (hasElements) { + list.add(getList(elementType.getElementClass())); + hasElements = jsonIterator.nextArrayElement(); + } + } + default -> throw new JsonParsingException("Unsupported array element type: " + elementType.getRawClass().getName()); + } + + return list; + } + + private Object getRootArray(ResolvedClass elementType) { + IteratorResult result = jsonIterator.startIteratingRootArray(); + Object array = getArray(elementType, result); + jsonIterator.assertNoMoreJsonValues(); + return array; + } + + private Object getArray(ResolvedClass elementType) { + IteratorResult result = jsonIterator.startIteratingArray(); + return getArray(elementType, result); + } + + private Object getArray(ResolvedClass elementType, IteratorResult result) { + if (result == IteratorResult.EMPTY) { + ResolvedClassCategory classCategory = elementType.getClassCategory(); + return classCategory.getEmptyArray() != null ? classCategory.getEmptyArray() : Array.newInstance(elementType.getRawClass(), 0); + } + if (result == IteratorResult.NULL) { + return null; + } + + return switch (elementType.getClassCategory()) { + case BOOLEAN_PRIMITIVE -> getPrimitiveBooleanArray(); + case BOOLEAN -> getBooleanArray(); + case BYTE_PRIMITIVE -> getBytePrimitiveArray(); + case BYTE -> getByteArray(); + case CHAR_PRIMITIVE -> getCharPrimitiveArray(); + case CHAR -> getCharArray(); + case SHORT_PRIMITIVE -> getShortPrimitiveArray(); + case SHORT -> getShortArray(); + case INT_PRIMITIVE -> getIntPrimitiveArray(); + case INT -> getIntArray(); + case LONG_PRIMITIVE -> getLongPrimitiveArray(); + case LONG -> getLongArray(); + case DOUBLE_PRIMITIVE -> getDoublePrimitiveArray(); + case DOUBLE -> getDoubleArray(); + case FLOAT_PRIMITIVE -> getFloatPrimitiveArray(); + case FLOAT -> getFloatArray(); + case STRING -> getStringArray(); + case CUSTOM -> getCustomObjectArray(elementType); + case ARRAY -> getArrayOfArrays(elementType); + case LIST -> throw new JsonParsingException("Arrays of lists are not supported."); + }; + } + + private Object getFloatArray() { + Float[] array = new Float[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Float[] copy = new Float[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getFloat(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Float[] copy = new Float[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getFloatPrimitiveArray() { + float[] array = new float[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + float[] copy = new float[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullFloat(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + float[] copy = new float[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getDoubleArray() { + Double[] array = new Double[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Double[] copy = new Double[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getDouble(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Double[] copy = new Double[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getDoublePrimitiveArray() { + double[] array = new double[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + double[] copy = new double[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullDouble(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + double[] copy = new double[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getLongPrimitiveArray() { + long[] array = new long[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + long[] copy = new long[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullLong(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + long[] copy = new long[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getLongArray() { + Long[] array = new Long[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Long[] copy = new Long[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getLong(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Long[] copy = new Long[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getShortPrimitiveArray() { + short[] array = new short[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + short[] copy = new short[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullShort(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + short[] copy = new short[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getShortArray() { + Short[] array = new Short[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Short[] copy = new Short[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getShort(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Short[] copy = new Short[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object[] getCustomObjectArray(ResolvedClass elementType) { + Object[] array = (Object[]) Array.newInstance(elementType.getRawClass(), INITIAL_ARRAY_SIZE); + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Object[] copy = (Object[]) Array.newInstance(elementType.getRawClass(), newCapacity); + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = getObject(elementType); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Object[] copy = (Object[]) Array.newInstance(elementType.getRawClass(), size); + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object[] getArrayOfArrays(ResolvedClass elementType) { + Object[] array = (Object[]) Array.newInstance(elementType.getRawClass(), INITIAL_ARRAY_SIZE); + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Object[] copy = (Object[]) Array.newInstance(elementType.getRawClass(), newCapacity); + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = getArray(elementType.getElementClass()); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Object[] copy = (Object[]) Array.newInstance(elementType.getRawClass(), size); + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Integer[] getIntArray() { + Integer[] array = new Integer[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Integer[] copy = new Integer[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getInt(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Integer[] copy = new Integer[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private int[] getIntPrimitiveArray() { + int[] array = new int[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + int[] copy = new int[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullInt(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + int[] copy = new int[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getCharArray() { + Character[] array = new Character[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Character[] copy = new Character[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getChar(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Character[] copy = new Character[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private char[] getCharPrimitiveArray() { + char[] array = new char[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + char[] copy = new char[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullChar(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + char[] copy = new char[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getByteArray() { + Byte[] array = new Byte[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Byte[] copy = new Byte[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getByte(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Byte[] copy = new Byte[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private byte[] getBytePrimitiveArray() { + byte[] array = new byte[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + byte[] copy = new byte[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullByte(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + byte[] copy = new byte[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Boolean[] getBooleanArray() { + Boolean[] array = new Boolean[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Boolean[] copy = new Boolean[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getBoolean(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Boolean[] copy = new Boolean[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private boolean[] getPrimitiveBooleanArray() { + boolean[] array = new boolean[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + boolean[] copy = new boolean[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullBoolean(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + boolean[] copy = new boolean[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private String[] getStringArray() { + String[] array = new String[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + String[] copy = new String[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = getString(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + String[] copy = new String[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private static int calculateNewCapacity(int oldCapacity) { + int minCapacity = oldCapacity + 1; + int newCapacity = oldCapacity + (oldCapacity >> 1); + if (newCapacity - minCapacity < 0) { + newCapacity = minCapacity; + } + return newCapacity; + } + + private String getString() { + int len = jsonIterator.getString(stringBuffer); + if (len == -1) { + return null; + } + return new String(stringBuffer, 0, len, UTF_8); + } + + private String getRootString() { + int len = jsonIterator.getRootString(stringBuffer); + if (len == -1) { + return null; + } + return new String(stringBuffer, 0, len, UTF_8); + } +} diff --git a/src/main/java/org/simdjson/SimdJsonParser.java b/src/main/java/org/simdjson/SimdJsonParser.java index 2ca2d1a..a752bc1 100644 --- a/src/main/java/org/simdjson/SimdJsonParser.java +++ b/src/main/java/org/simdjson/SimdJsonParser.java @@ -11,6 +11,7 @@ public class SimdJsonParser { private final StructuralIndexer indexer; private final BitIndexes bitIndexes; private final JsonIterator jsonIterator; + private final SchemaBasedJsonIterator schemaBasedJsonIterator; private final byte[] paddedBuffer; public SimdJsonParser() { @@ -19,18 +20,28 @@ public SimdJsonParser() { public SimdJsonParser(int capacity, int maxDepth) { bitIndexes = new BitIndexes(capacity); - jsonIterator = new JsonIterator(bitIndexes, capacity, maxDepth, PADDING); + byte[] stringBuffer = new byte[capacity]; + jsonIterator = new JsonIterator(bitIndexes, stringBuffer, capacity, maxDepth, PADDING); + schemaBasedJsonIterator = new SchemaBasedJsonIterator(bitIndexes, stringBuffer, PADDING); paddedBuffer = new byte[capacity]; reader = new BlockReader(STEP_SIZE); indexer = new StructuralIndexer(bitIndexes); } + public T parse(byte[] buffer, int len, Class expectedType) { + stage0(buffer); + byte[] padded = padIfNeeded(buffer, len); + reset(padded, len); + stage1(padded); + return schemaBasedJsonIterator.walkDocument(padded, len, expectedType); + } + public JsonValue parse(byte[] buffer, int len) { stage0(buffer); byte[] padded = padIfNeeded(buffer, len); reset(padded, len); stage1(padded); - return stage2(padded, len); + return jsonIterator.walkDocument(padded, len); } private byte[] padIfNeeded(byte[] buffer, int len) { @@ -62,8 +73,4 @@ private void stage1(byte[] buffer) { reader.advance(); indexer.finish(reader.getBlockIndex()); } - - private JsonValue stage2(byte[] buffer, int len) { - return jsonIterator.walkDocument(buffer, len); - } } diff --git a/src/main/java/org/simdjson/StringParser.java b/src/main/java/org/simdjson/StringParser.java index 11fb7fd..c03e7eb 100644 --- a/src/main/java/org/simdjson/StringParser.java +++ b/src/main/java/org/simdjson/StringParser.java @@ -4,7 +4,6 @@ import static org.simdjson.CharacterUtils.escape; import static org.simdjson.CharacterUtils.hexToInt; -import static org.simdjson.Tape.STRING; class StringParser { @@ -16,20 +15,20 @@ class StringParser { private static final int MIN_LOW_SURROGATE = 0xDC00; private static final int MAX_LOW_SURROGATE = 0xDFFF; - private final Tape tape; - private final byte[] stringBuffer; - - private int stringBufferIdx; + int parseString(byte[] buffer, int idx, byte[] stringBuffer, int stringBufferIdx) { + int dst = doParseString(buffer, idx, stringBuffer, stringBufferIdx + Integer.BYTES); + int len = dst - stringBufferIdx - Integer.BYTES; + IntegerUtils.toBytes(len, stringBuffer, stringBufferIdx); + return dst; + } - StringParser(Tape tape, byte[] stringBuffer) { - this.tape = tape; - this.stringBuffer = stringBuffer; + int parseString(byte[] buffer, int idx, byte[] stringBuffer) { + return doParseString(buffer, idx, stringBuffer, 0); } - void parseString(byte[] buffer, int idx) { - tape.append(stringBufferIdx, STRING); + private int doParseString(byte[] buffer, int idx, byte[] stringBuffer, int offset) { int src = idx + 1; - int dst = stringBufferIdx + Integer.BYTES; + int dst = offset; while (true) { ByteVector srcVec = ByteVector.fromArray(StructuralIndexer.BYTE_SPECIES, buffer, src); srcVec.intoArray(stringBuffer, dst); @@ -54,7 +53,7 @@ void parseString(byte[] buffer, int idx) { } else if (codePoint >= MIN_LOW_SURROGATE && codePoint <= MAX_LOW_SURROGATE) { throw new JsonParsingException("Invalid code point. The range U+DC00–U+DFFF is reserved for low surrogate."); } - dst += storeCodePointInStringBuffer(codePoint, dst); + dst += storeCodePointInStringBuffer(codePoint, dst, stringBuffer); } else { stringBuffer[dst + backslashDist] = escape(escapeChar); src += backslashDist + 2; @@ -65,9 +64,49 @@ void parseString(byte[] buffer, int idx) { dst += BYTES_PROCESSED; } } - int len = dst - stringBufferIdx - Integer.BYTES; - IntegerUtils.toBytes(len, stringBuffer, stringBufferIdx); - stringBufferIdx = dst; + return dst; + } + + char parseChar(byte[] buffer, int startIdx) { + int idx = startIdx + 1; + char character; + if (buffer[idx] == '\\') { + byte escapeChar = buffer[idx + 1]; + if (escapeChar == 'u') { + int codePoint = hexToInt(buffer, idx + 2); + if (codePoint >= MIN_HIGH_SURROGATE && codePoint <= MAX_LOW_SURROGATE) { + throw new JsonParsingException("Invalid code point. Should be within the range U+0000–U+D777 or U+E000–U+FFFF."); + } + if (codePoint < 0) { + throw new JsonParsingException("Invalid unicode escape sequence."); + } + character = (char) codePoint; + idx += 6; + } else { + character = (char) escape(escapeChar); + idx += 2; + } + } else if (buffer[idx] >= 0) { + // We have an ASCII character + character = (char) buffer[idx]; + idx++; + } else if ((buffer[idx] & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8 character + int codePoint = (buffer[idx] & 0b00011111) << 6 | (buffer[idx + 1] & 0b00111111); + character = (char) codePoint; + idx += 2; + } else if ((buffer[idx] & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8 character + int codePoint = (buffer[idx] & 0b00001111) << 12 | (buffer[idx + 1] & 0b00111111) << 6 | (buffer[idx + 2] & 0b00111111); + character = (char) codePoint; + idx += 3; + } else { + throw new JsonParsingException("String cannot be deserialized to a char. Expected a single 16-bit code unit character."); + } + if (buffer[idx] != '"') { + throw new JsonParsingException("String cannot be deserialized to a char. Expected a single-character string."); + } + return character; } private int parseLowSurrogate(byte[] buffer, int src, int codePoint) { @@ -84,7 +123,7 @@ private int parseLowSurrogate(byte[] buffer, int src, int codePoint) { } } - private int storeCodePointInStringBuffer(int codePoint, int dst) { + private int storeCodePointInStringBuffer(int codePoint, int dst, byte[] stringBuffer) { if (codePoint < 0) { throw new JsonParsingException("Invalid unicode escape sequence."); } @@ -120,8 +159,4 @@ private boolean hasQuoteFirst(long backslashBits, long quoteBits) { private boolean hasBackslash(long backslashBits, long quoteBits) { return ((quoteBits - 1) & backslashBits) != 0; } - - void reset() { - stringBufferIdx = 0; - } } diff --git a/src/main/java/org/simdjson/StructuralIndexer.java b/src/main/java/org/simdjson/StructuralIndexer.java index 43ec952..b2c4cbf 100644 --- a/src/main/java/org/simdjson/StructuralIndexer.java +++ b/src/main/java/org/simdjson/StructuralIndexer.java @@ -85,7 +85,6 @@ private void finishStep(JsonCharacterBlock characters, JsonStringBlock strings, long nonQuoteScalar = scalar & ~strings.quote(); long followsNonQuoteScalar = nonQuoteScalar << 1 | prevScalar; prevScalar = nonQuoteScalar >>> 63; - // TODO: utf-8 validation long potentialScalarStart = scalar & ~followsNonQuoteScalar; long potentialStructuralStart = characters.op() | potentialScalarStart; bitIndexes.write(blockIndex, prevStructurals); @@ -94,8 +93,7 @@ private void finishStep(JsonCharacterBlock characters, JsonStringBlock strings, } private long lteq(ByteVector chunk0, byte scalar) { - long r = chunk0.compare(UNSIGNED_LE, scalar).toLong(); - return r; + return chunk0.compare(UNSIGNED_LE, scalar).toLong(); } private long lteq(ByteVector chunk0, ByteVector chunk1, byte scalar) { @@ -106,6 +104,7 @@ private long lteq(ByteVector chunk0, ByteVector chunk1, byte scalar) { void finish(int blockIndex) { bitIndexes.write(blockIndex, prevStructurals); + bitIndexes.finish(); stringScanner.finish(); if (unescapedCharsError != 0) { diff --git a/src/main/java/org/simdjson/TapeBuilder.java b/src/main/java/org/simdjson/TapeBuilder.java index fc7f87e..3d05783 100644 --- a/src/main/java/org/simdjson/TapeBuilder.java +++ b/src/main/java/org/simdjson/TapeBuilder.java @@ -10,6 +10,7 @@ import static org.simdjson.Tape.ROOT; import static org.simdjson.Tape.START_ARRAY; import static org.simdjson.Tape.START_OBJECT; +import static org.simdjson.Tape.STRING; import static org.simdjson.Tape.TRUE_VALUE; class TapeBuilder { @@ -23,16 +24,18 @@ class TapeBuilder { private final NumberParser numberParser; private final StringParser stringParser; - TapeBuilder(int capacity, int depth, int padding) { + private int stringBufferIdx; + + TapeBuilder(int capacity, int depth, int padding, byte[] stringBuffer) { this.tape = new Tape(capacity); this.openContainers = new OpenContainer[depth]; this.padding = padding; for (int i = 0; i < openContainers.length; i++) { openContainers[i] = new OpenContainer(); } - this.stringBuffer = new byte[capacity]; - this.numberParser = new NumberParser(tape); - this.stringParser = new StringParser(tape, stringBuffer); + this.stringBuffer = stringBuffer; + this.numberParser = new NumberParser(); + this.stringParser = new StringParser(); } void visitDocumentStart() { @@ -55,9 +58,9 @@ void visitEmptyArray() { void visitRootPrimitive(byte[] buffer, int idx, int len) { switch (buffer[idx]) { case '"' -> visitString(buffer, idx); - case 't' -> visitRootTrueAtom(buffer, idx); - case 'f' -> visitRootFalseAtom(buffer, idx); - case 'n' -> visitRootNullAtom(buffer, idx); + case 't' -> visitRootTrueAtom(buffer, idx, len); + case 'f' -> visitRootFalseAtom(buffer, idx, len); + case 'n' -> visitRootNullAtom(buffer, idx, len); case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> visitRootNumber(buffer, idx, len); default -> throw new JsonParsingException("Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."); } @@ -102,8 +105,9 @@ private void visitTrueAtom(byte[] buffer, int idx) { tape.append(0, TRUE_VALUE); } - private void visitRootTrueAtom(byte[] buffer, int idx) { - if (!isTrue(buffer, idx)) { + private void visitRootTrueAtom(byte[] buffer, int idx, int len) { + boolean valid = idx + 4 <= len && isTrue(buffer, idx) && (idx + 4 == len || isStructuralOrWhitespace(buffer[idx + 4])); + if (!valid) { throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'true'."); } tape.append(0, TRUE_VALUE); @@ -124,8 +128,9 @@ private void visitFalseAtom(byte[] buffer, int idx) { tape.append(0, FALSE_VALUE); } - private void visitRootFalseAtom(byte[] buffer, int idx) { - if (!isFalse(buffer, idx)) { + private void visitRootFalseAtom(byte[] buffer, int idx, int len) { + boolean valid = idx + 5 <= len && isFalse(buffer, idx) && (idx + 5 == len || isStructuralOrWhitespace(buffer[idx + 5])); + if (!valid) { throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'false'."); } tape.append(0, FALSE_VALUE); @@ -147,8 +152,9 @@ private void visitNullAtom(byte[] buffer, int idx) { tape.append(0, NULL_VALUE); } - private void visitRootNullAtom(byte[] buffer, int idx) { - if (!isNull(buffer, idx)) { + private void visitRootNullAtom(byte[] buffer, int idx, int len) { + boolean valid = idx + 4 <= len && isNull(buffer, idx) && (idx + 4 == len || isStructuralOrWhitespace(buffer[idx + 4])); + if (!valid) { throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'null'."); } tape.append(0, NULL_VALUE); @@ -166,11 +172,12 @@ void visitKey(byte[] buffer, int idx) { } private void visitString(byte[] buffer, int idx) { - stringParser.parseString(buffer, idx); + tape.append(stringBufferIdx, STRING); + stringBufferIdx = stringParser.parseString(buffer, idx, stringBuffer, stringBufferIdx); } private void visitNumber(byte[] buffer, int idx) { - numberParser.parseNumber(buffer, idx); + numberParser.parseNumber(buffer, idx, tape); } private void visitRootNumber(byte[] buffer, int idx, int len) { @@ -178,7 +185,7 @@ private void visitRootNumber(byte[] buffer, int idx, int len) { byte[] copy = new byte[remainingLen + padding]; System.arraycopy(buffer, idx, copy, 0, remainingLen); Arrays.fill(copy, remainingLen, remainingLen + padding, SPACE); - numberParser.parseNumber(copy, 0); + numberParser.parseNumber(copy, 0, tape); } private void startContainer(int depth) { @@ -202,7 +209,7 @@ private void emptyContainer(char start, char end) { void reset() { tape.reset(); - stringParser.reset(); + stringBufferIdx = 0; } JsonValue createJsonValue(byte[] buffer) { diff --git a/src/main/java/org/simdjson/annotations/JsonFieldName.java b/src/main/java/org/simdjson/annotations/JsonFieldName.java new file mode 100644 index 0000000..04c5530 --- /dev/null +++ b/src/main/java/org/simdjson/annotations/JsonFieldName.java @@ -0,0 +1,13 @@ +package org.simdjson.annotations; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Target({ElementType.ANNOTATION_TYPE, ElementType.FIELD, ElementType.METHOD, ElementType.PARAMETER}) +@Retention(RetentionPolicy.RUNTIME) +public @interface JsonFieldName { + + String value() default ""; +} diff --git a/src/test/java/org/simdjson/ArrayParsingTest.java b/src/test/java/org/simdjson/ArrayParsingTest.java new file mode 100644 index 0000000..5481569 --- /dev/null +++ b/src/test/java/org/simdjson/ArrayParsingTest.java @@ -0,0 +1,245 @@ +package org.simdjson; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.simdjson.testutils.MapEntry; +import org.simdjson.testutils.MapSource; + +import java.util.Iterator; +import java.util.NoSuchElementException; + +import static org.assertj.core.api.Assertions.fail; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.TestUtils.toUtf8; +import static org.simdjson.testutils.SimdJsonAssertions.assertThat; + +public class ArrayParsingTest { + + @Test + public void emptyArrayAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + Iterator it = jsonValue.arrayIterator(); + while (it.hasNext()) { + fail("Unexpected value"); + it.next(); + } + } + + @Test + public void arrayIterator() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1, 2, 3]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + int[] expectedValues = new int[]{1, 2, 3}; + int counter = 0; + Iterator it = jsonValue.arrayIterator(); + while (it.hasNext()) { + JsonValue element = it.next(); + assertThat(element.isLong()).isTrue(); + assertThat(element.asLong()).isEqualTo(expectedValues[counter]); + counter++; + } + assertThat(counter).isEqualTo(expectedValues.length); + } + + @Test + public void arraySize() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1, 2, 3]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + assertThat(jsonValue.getSize()).isEqualTo(3); + } + + @Test + public void largeArraySize() { + // given + SimdJsonParser parser = new SimdJsonParser(); + int realArraySize = 0xFFFFFF + 1; + byte[] json = new byte[realArraySize * 2 - 1 + 2]; + json[0] = '['; + int i = 0; + while (i < realArraySize) { + json[i * 2 + 1] = (byte) '0'; + json[i * 2 + 2] = (byte) ','; + i++; + } + json[json.length - 1] = ']'; + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + assertThat(jsonValue.getSize()).isEqualTo(0xFFFFFF); + } + + @Test + public void missingCommaInArrayAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1 1]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Missing comma between array values"); + } + + @ParameterizedTest + @ValueSource(strings = {"[1,,1]", "[,]", "[,,]"}) + public void tooManyCommas(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."); + } + + @ParameterizedTest + @ValueSource(strings = {"[,", "[1 ", "[,,", "[1,", "[1", "["}) + public void unclosedArray(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Unclosed array. Missing ']' for starting '['."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "[[]]", value = "Missing comma between array values"), + @MapEntry(stringKey = "[]", value = "Unclosed array. Missing ']' for starting '['.") + }) + public void unclosedArrayDueToPassedLength(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length - 1)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @Test + public void missingCommaInArrayAtObjectField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1 1]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Missing comma between array values"); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "[,", value = "Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."), + @MapEntry(stringKey = "[1 ", value = "Missing comma between array values"), + @MapEntry(stringKey = "[,,", value = "Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."), + @MapEntry(stringKey = "[1,", value = "Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."), + @MapEntry(stringKey = "[1", value = "Missing comma between array values"), + @MapEntry(stringKey = "[", value = "Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'.") + }) + public void unclosedArrayAtObjectField(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @Test + public void noMoreElements() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1, 2, 3]"); + JsonValue jsonValue = parser.parse(json, json.length); + Iterator it = jsonValue.arrayIterator(); + it.next(); + it.next(); + it.next(); + + // when + NoSuchElementException ex = assertThrows(NoSuchElementException.class, it::next); + + // then + assertThat(ex) + .hasMessage("No more elements"); + } + + @Test + public void unclosedArrayPaddedWithOpenBraces() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[[[["); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 2)); + + // then + assertThat(ex) + .hasMessage("Unclosed array. Missing ']' for starting '['."); + } + + @Test + public void validArrayPaddedWithOpenBraces() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[][[[["); + + // when + JsonValue jsonValue = parser.parse(json, 2); + + // then + assertThat(jsonValue.isArray()).isTrue(); + Iterator it = jsonValue.arrayIterator(); + while (it.hasNext()) { + fail("Unexpected value"); + it.next(); + } + } +} diff --git a/src/test/java/org/simdjson/ArraySchemaBasedParsingTest.java b/src/test/java/org/simdjson/ArraySchemaBasedParsingTest.java new file mode 100644 index 0000000..e743b87 --- /dev/null +++ b/src/test/java/org/simdjson/ArraySchemaBasedParsingTest.java @@ -0,0 +1,503 @@ +package org.simdjson; + +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.simdjson.schemas.ClassWithIntegerField; +import org.simdjson.schemas.RecordWithBooleanListField; +import org.simdjson.schemas.RecordWithIntegerListField; +import org.simdjson.schemas.RecordWithPrimitiveIntegerArrayField; +import org.simdjson.schemas.RecordWithStringArrayField; +import org.simdjson.testutils.MapEntry; +import org.simdjson.testutils.MapSource; +import org.simdjson.testutils.SchemaBasedRandomValueSource; + +import java.lang.reflect.Array; +import java.util.AbstractList; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.TestUtils.padWithSpaces; +import static org.simdjson.TestUtils.toUtf8; +import static org.simdjson.testutils.SimdJsonAssertions.assertThat; + +public class ArraySchemaBasedParsingTest { + + @ParameterizedTest + @ValueSource(classes = { + Object[].class, + String[].class, + char[].class, + Character[].class, + byte[].class, + Byte[].class, + short[].class, + Short[].class, + int[].class, + Integer[].class, + long[].class, + Long[].class, + boolean[].class, + Boolean[].class, + float[].class, + Float[].class, + double[].class, + Double[].class, + ClassWithIntegerField[].class + }) + public void emptyArrayAtRoot(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[]"); + + // when + Object array = parser.parse(json, json.length, expectedType); + + // then + assertThat(array).isInstanceOf(expectedType); + assertThat(array.getClass().isArray()).isTrue(); + Assertions.assertThat(Array.getLength(array)).isEqualTo(0); + } + + @Test + public void objectWithEmptyArrayField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": []}"); + + // when + RecordWithStringArrayField object = parser.parse(json, json.length, RecordWithStringArrayField.class); + + // then + assertThat(object.field()).isEmpty(); + } + + @ParameterizedTest + @ValueSource(strings = {"1", "true", "false", "{}", ":", ",", "\"abc\""}) + public void invalidTypeAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, int[].class)); + + // then + assertThat(ex) + .hasMessage("Expected '[' but got: '" + jsonStr.charAt(0) + "'."); + } + + @Test + public void missingCommaInArrayAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1 1]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, int[].class)); + + // then + assertThat(ex) + .hasMessage("Missing comma between array values"); + } + + @ParameterizedTest + @ValueSource(strings = {"[1,,1]", "[,]", "[,,]"}) + public void tooManyCommasInArrayAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, int[].class)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @ValueSource(strings = {"[,", "[1 ", "[,,", "[1,", "[1", "["}) + public void unclosedArrayAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, int[].class)); + + // then + assertThat(ex) + .hasMessage("Unclosed array. Missing ']' for starting '['."); + } + + @Test + public void unclosedArrayDueToPassedLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[[]]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, int[][].class)); + + // then + assertThat(ex) + .hasMessage("Missing comma between array values"); + } + + @Test + public void unclosedArrayPaddedWithOpenBraces() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[[[["); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 2, int[].class)); + + // then + assertThat(ex) + .hasMessage("Unclosed array. Missing ']' for starting '['."); + } + + @Test + public void validArrayPaddedWithOpenBraces() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[][[[["); + + // when + int[] array = parser.parse(json, 2, int[].class); + + // then + assertThat(array).isEmpty(); + } + + @Test + public void missingCommaInArrayAtObjectField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1 1]}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveIntegerArrayField.class) + ); + + // then + assertThat(ex) + .hasMessage("Missing comma between array values"); + } + + @Test + public void missingCommaInListAtObjectField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1 1]}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerListField.class) + ); + + // then + assertThat(ex) + .hasMessage("Missing comma between array values"); + } + + @ParameterizedTest + @ValueSource(strings = {"[1,,1]", "[,]", "[,,]"}) + public void tooManyCommasInArrayAtObjectField(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveIntegerArrayField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @ValueSource(strings = {"[1,,1]", "[,]", "[,,]"}) + public void tooManyCommasInListAtObjectField(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerListField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "{\"field\": [,}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"field\": [1 }", value = "Missing comma between array values"), + @MapEntry(stringKey = "{\"field\": [,,}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"field\": [1,}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"field\": [1}", value = "Missing comma between array values"), + @MapEntry(stringKey = "{\"field\": [}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"ignore\": [1, \"field\": []}", value = "Expected ',' but reached end of buffer."), + @MapEntry(stringKey = "{\"ignore\": [", value = "Unclosed object. Missing '}' for starting '{'.") + }) + public void unclosedArrayAtObjectField(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveIntegerArrayField.class) + ); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "{\"field\": [,}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"field\": [1 }", value = "Missing comma between array values"), + @MapEntry(stringKey = "{\"field\": [,,}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"field\": [1,}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"field\": [1}", value = "Missing comma between array values"), + @MapEntry(stringKey = "{\"field\": [}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"ignore\": [1, \"field\": []}", value = "Expected ',' but reached end of buffer."), + @MapEntry(stringKey = "{\"ignore\": [", value = "Unclosed object. Missing '}' for starting '{'.") + }) + public void unclosedListAtObjectField(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerListField.class) + ); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @ValueSource(classes = {AbstractList.class, LinkedList.class, ArrayList.class, Set.class}) + public void unsupportedTypeForArrays(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1, 2, 3]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Unsupported class: " + expectedType.getName() + + ". For JSON arrays at the root, use Java arrays. For inner JSON arrays, use either Java arrays or java.util.List."); + } + + @Test + public void listsAtRootAreNotSupported() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1, 2, 3]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, List.class)); + + // then + assertThat(ex) + .hasMessage("Undefined list element type."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = int[][].class, nulls = false) + public void multidimensionalArrays2d(String jsonStr, int[][] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + int[][] array = parser.parse(json, json.length, int[][].class); + + // then + assertThat(array) + .isDeepEqualTo(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = int[][][].class, nulls = false) + public void multidimensionalArrays3d(String jsonStr, int[][][] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + int[][][] array = parser.parse(json, json.length, int[][][].class); + + // then + assertThat(array) + .isDeepEqualTo(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = RecordWith2dIntegerListField.class, nulls = false) + public void multidimensionalArrays2dAsList(String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + RecordWith2dIntegerListField object = parser.parse(json, json.length, RecordWith2dIntegerListField.class); + + // then + assertThat(object).usingRecursiveComparison().isEqualTo(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = RecordWith3dIntegerListField.class, nulls = false) + public void multidimensionalArrays3dAsList(String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + RecordWith3dIntegerListField object = parser.parse(json, json.length, RecordWith3dIntegerListField.class); + + // then + assertThat(object).usingRecursiveComparison().isEqualTo(expected); + } + + @Test + public void nullAtRootWhenArrayIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + int[] object = parser.parse(json, json.length, int[].class); + + // then + assertThat(object).isNull(); + } + + @Test + public void nullAtObjectFieldWhenArrayIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + RecordWithPrimitiveIntegerArrayField object = parser.parse(json, json.length, RecordWithPrimitiveIntegerArrayField.class); + + // then + assertThat(object).isNotNull(); + assertThat(object.field()).isNull(); + } + + @Test + public void nullAtObjectFieldWhenListIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + RecordWithBooleanListField object = parser.parse(json, json.length, RecordWithBooleanListField.class); + + // then + assertThat(object).isNotNull(); + assertThat(object.field()).isNull(); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "[],", value = "Unclosed array. Missing ']' for starting '['."), + @MapEntry(stringKey = "[1, 2, 3],", value = "Unclosed array. Missing ']' for starting '['."), + @MapEntry(stringKey = "[1, 2, 3][]", value = "More than one JSON value at the root of the document, or extra characters at the end of the JSON!"), + @MapEntry(stringKey = "[1, 2, 3]{}", value = "Unclosed array. Missing ']' for starting '['."), + @MapEntry(stringKey = "[1, 2, 3]1", value = "Unclosed array. Missing ']' for starting '['."), + @MapEntry(stringKey = "null,", value = "More than one JSON value at the root of the document, or extra characters at the end of the JSON!") + }) + public void moreValuesThanOneArrayAtRoot(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, int[].class)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @Test + public void arraysOfListsAreUnsupported() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[[1, 2], [1], [12, 13]]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, List[].class)); + + // then + assertThat(ex) + .hasMessage("Undefined list element type."); + } + + @Test + public void emptyJson() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, int[].class)); + + // then + assertThat(ex) + .hasMessage("No structural element found."); + } + + @Test + public void passedLengthSmallerThanNullLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("null")); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, Boolean[].class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } + + private record RecordWith2dIntegerListField(List> field) { + + } + + private record RecordWith3dIntegerListField(List>> field) { + + } +} diff --git a/src/test/java/org/simdjson/BenchmarkCorrectnessTest.java b/src/test/java/org/simdjson/BenchmarkCorrectnessTest.java index d6deecf..06a1719 100644 --- a/src/test/java/org/simdjson/BenchmarkCorrectnessTest.java +++ b/src/test/java/org/simdjson/BenchmarkCorrectnessTest.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Set; import static org.assertj.core.api.Assertions.assertThat; @@ -20,22 +21,48 @@ public class BenchmarkCorrectnessTest { public void countUniqueTwitterUsersWithDefaultProfile() throws IOException { // given SimdJsonParser parser = new SimdJsonParser(); - Set defaultUsers = new HashSet<>(); byte[] json = loadTestFile("/twitter.json"); - // when - JsonValue simdJsonValue = parser.parse(json, json.length); - Iterator tweets = simdJsonValue.get("statuses").arrayIterator(); - while (tweets.hasNext()) { - JsonValue tweet = tweets.next(); - JsonValue user = tweet.get("user"); - if (user.get("default_profile").asBoolean()) { - defaultUsers.add(user.get("screen_name").asString()); + for (int i = 0; i < 10; i++) { + Set defaultUsers = new HashSet<>(); + + // when + JsonValue simdJsonValue = parser.parse(json, json.length); + Iterator tweets = simdJsonValue.get("statuses").arrayIterator(); + while (tweets.hasNext()) { + JsonValue tweet = tweets.next(); + JsonValue user = tweet.get("user"); + if (user.get("default_profile").asBoolean()) { + defaultUsers.add(user.get("screen_name").asString()); + } } + + // then + assertThat(defaultUsers.size()).isEqualTo(86); } + } - // then - assertThat(defaultUsers.size()).isEqualTo(86); + @Test + public void schemaBasedCountUniqueTwitterUsersWithDefaultProfile() throws IOException { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = loadTestFile("/twitter.json"); + + for (int i = 0; i < 10; i++) { + Set defaultUsers = new HashSet<>(); + + // when + Statuses statuses = parser.parse(json, json.length, Statuses.class); + for (var status : statuses.statuses()) { + User user = status.user(); + if (user.default_profile()) { + defaultUsers.add(user.screen_name()); + } + } + + // then + assertThat(defaultUsers.size()).isEqualTo(86); + } } @ParameterizedTest @@ -46,13 +73,25 @@ public void countUniqueTwitterUsersWithDefaultProfile() throws IOException { public void numberParserTest(String input, Double expected) { // given Tape tape = new Tape(100); - NumberParser numberParser = new NumberParser(tape); + NumberParser numberParser = new NumberParser(); byte[] numberUtf8Bytes = toUtf8(padWithSpaces(input)); // when - numberParser.parseNumber(numberUtf8Bytes, 0); + numberParser.parseNumber(numberUtf8Bytes, 0, tape); // then assertThat(tape.getDouble(0)).isEqualTo(expected); } + + record User(boolean default_profile, String screen_name) { + + } + + record Status(User user) { + + } + + record Statuses(List statuses) { + + } } diff --git a/src/test/java/org/simdjson/BooleanParsingTest.java b/src/test/java/org/simdjson/BooleanParsingTest.java new file mode 100644 index 0000000..37979c5 --- /dev/null +++ b/src/test/java/org/simdjson/BooleanParsingTest.java @@ -0,0 +1,121 @@ +package org.simdjson; + +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.util.Iterator; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.TestUtils.padWithSpaces; +import static org.simdjson.TestUtils.toUtf8; +import static org.simdjson.testutils.SimdJsonAssertions.assertThat; + +public class BooleanParsingTest { + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void booleanValuesAtRoot(boolean booleanVal) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(Boolean.toString(booleanVal)); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue).isEqualTo(booleanVal); + } + + @ParameterizedTest + @ValueSource(strings = {"true,", "false,"}) + public void moreThanBooleanAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @ParameterizedTest + @ValueSource(strings = {"fals", "falsee", "[f]", "{\"a\":f}"}) + public void invalidFalse(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at " + jsonStr.indexOf('f') + ". Expected 'false'."); + } + + @ParameterizedTest + @ValueSource(strings = {"tru", "truee", "[t]", "{\"a\":t}"}) + public void invalidTrue(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at " + jsonStr.indexOf('t') + ". Expected 'true'."); + } + + @Test + public void arrayOfBooleans() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[true, false]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + Iterator it = jsonValue.arrayIterator(); + Assertions.assertThat(it.hasNext()).isTrue(); + assertThat(it.next()).isEqualTo(true); + assertThat(it.next()).isEqualTo(false); + Assertions.assertThat(it.hasNext()).isFalse(); + } + + @Test + public void passedLengthSmallerThanTrueLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("true")); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'true'."); + } + + @Test + public void passedLengthSmallerThanFalseLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("false")); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 4)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'false'."); + } +} diff --git a/src/test/java/org/simdjson/BooleanSchemaBasedParsingTest.java b/src/test/java/org/simdjson/BooleanSchemaBasedParsingTest.java new file mode 100644 index 0000000..353a73f --- /dev/null +++ b/src/test/java/org/simdjson/BooleanSchemaBasedParsingTest.java @@ -0,0 +1,593 @@ +package org.simdjson; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.simdjson.schemas.RecordWithBooleanArrayField; +import org.simdjson.schemas.RecordWithBooleanField; +import org.simdjson.schemas.RecordWithBooleanListField; +import org.simdjson.schemas.RecordWithIntegerField; +import org.simdjson.schemas.RecordWithPrimitiveBooleanArrayField; +import org.simdjson.schemas.RecordWithPrimitiveBooleanField; +import org.simdjson.schemas.RecordWithPrimitiveIntegerField; +import org.simdjson.schemas.RecordWithStringField; +import org.simdjson.testutils.MapEntry; +import org.simdjson.testutils.MapSource; +import org.simdjson.testutils.SchemaBasedRandomValueSource; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.TestUtils.padWithSpaces; +import static org.simdjson.TestUtils.toUtf8; + +public class BooleanSchemaBasedParsingTest { + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void booleanValueAtRoot(boolean booleanVal) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(Boolean.toString(booleanVal)); + + // when + Boolean booleanValue = parser.parse(json, json.length, Boolean.class); + + // then + assertThat(booleanValue).isEqualTo(booleanVal); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void primitiveBooleanValueAtRoot(boolean booleanVal) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(Boolean.toString(booleanVal)); + + // when + boolean booleanValue = parser.parse(json, json.length, boolean.class); + + // then + assertThat(booleanValue).isEqualTo(booleanVal); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void booleanValueAtObjectField(boolean booleanVal) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + booleanVal + "}"); + + // when + RecordWithBooleanField object = parser.parse(json, json.length, RecordWithBooleanField.class); + + // then + assertThat(object.field()).isEqualTo(booleanVal); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void primitiveBooleanValueAtObjectField(boolean booleanVal) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + booleanVal + "}"); + + // when + RecordWithPrimitiveBooleanField object = parser.parse(json, json.length, RecordWithPrimitiveBooleanField.class); + + // then + assertThat(object.field()).isEqualTo(booleanVal); + } + + @Test + public void nullAtRootWhenBooleanIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + Boolean booleanValue = parser.parse(json, json.length, Boolean.class); + + // then + assertThat(booleanValue).isNull(); + } + + @Test + public void nullAtRootWhenPrimitiveBooleanIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, boolean.class)); + + // then + assertThat(ex) + .hasMessage("Unrecognized boolean value. Expected: 'true' or 'false'."); + } + + @ParameterizedTest + @ValueSource(strings = {"\"abc\"", "1"}) + public void invalidTypeForBoolean(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Boolean.class)); + + // then + assertThat(ex) + .hasMessage("Unrecognized boolean value. Expected: 'true', 'false' or 'null'."); + } + + @ParameterizedTest + @ValueSource(strings = {"\"abc\"", "1"}) + public void invalidTypeForPrimitiveBoolean(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, boolean.class)); + + // then + assertThat(ex) + .hasMessage("Unrecognized boolean value. Expected: 'true' or 'false'."); + } + + @Test + public void nullAtObjectFieldWhenBooleanIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + RecordWithBooleanField object = parser.parse(json, json.length, RecordWithBooleanField.class); + + // then + assertThat(object.field()).isNull(); + } + + @Test + public void nullAtObjectFieldWhenPrimitiveBooleanIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveBooleanField.class) + ); + + // then + assertThat(ex) + .hasMessage("Unrecognized boolean value. Expected: 'true' or 'false'."); + } + + @ParameterizedTest + @ValueSource(strings = {"true,", "false,"}) + public void moreValuesThanOnePrimitiveBooleanAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, boolean.class)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @ParameterizedTest + @ValueSource(strings = {"true,", "false,", "null,"}) + public void moreValuesThanOneBooleanAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Boolean.class)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "truee", value = "true"), + @MapEntry(stringKey = "falsee", value = "false"), + @MapEntry(stringKey = "nul", value = "null"), + @MapEntry(stringKey = "nulll", value = "null"), + @MapEntry(stringKey = "nuul", value = "null") + }) + public void invalidBooleanAtRoot(String actual, String expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(actual); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Boolean.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected '" + expected + "'."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "truee", value = "true"), + @MapEntry(stringKey = "falsee", value = "false") + }) + public void invalidPrimitiveBooleanAtRoot(String actual, String expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(actual); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, boolean.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected '" + expected + "'."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = Integer.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = int.class, value = "Invalid number. Minus has to be followed by a digit.") + }) + public void mismatchedTypeForTrueAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("true"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = Integer.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = int.class, value = "Invalid number. Minus has to be followed by a digit.") + }) + public void mismatchedTypeForFalseAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("false"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithPrimitiveIntegerField.class, value = "Invalid number. Minus has to be followed by a digit.") + }) + public void mismatchedTypeForTrue(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": true}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithPrimitiveIntegerField.class, value = "Invalid number. Minus has to be followed by a digit.") + }) + public void mismatchedTypeForFalse(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": false}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Boolean[].class, nulls = false) + public void arrayOfBooleansAtRoot(String jsonStr, Boolean[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Boolean[] array = parser.parse(json, json.length, Boolean[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Boolean[].class, nulls = true) + public void arrayOfBooleansAndNullsAtRoot(String jsonStr, Boolean[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Boolean[] array = parser.parse(json, json.length, Boolean[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = boolean[].class, nulls = false) + public void arrayOfPrimitiveBooleansAtRoot(String jsonStr, boolean[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + boolean[] array = parser.parse(json, json.length, boolean[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @Test + public void arrayOfPrimitiveBooleansAndNullsAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[true, false, null]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, boolean[].class)); + + // then + assertThat(ex) + .hasMessage("Unrecognized boolean value. Expected: 'true' or 'false'."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = boolean[].class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = Boolean[].class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'.") + }) + public void arrayOfBooleansMixedWithOtherTypesAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[true, false, 1]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = RecordWithPrimitiveBooleanArrayField.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = RecordWithBooleanArrayField.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = RecordWithBooleanListField.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'.") + }) + public void arrayOfBooleansMixedWithOtherTypesAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [true, false, 1]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = int[].class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = int.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = boolean.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = Boolean.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = boolean[][].class, value = "Expected '[' but got: 't'."), + @MapEntry(classKey = Boolean[][].class, value = "Expected '[' but got: 't'.") + }) + public void mismatchedTypeForArrayOfBooleansAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[true, false]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = boolean[].class, value = "Expected '[' but got: '{'."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithIntegerField.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = RecordWithPrimitiveBooleanField.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = boolean.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = Boolean.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'.") + }) + public void mismatchedTypeForArrayOfBooleansAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [true, false]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Boolean[].class, nulls = false) + public void objectWithArrayOfBooleans(String jsonStr, Boolean[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithBooleanArrayField object = parser.parse(json, json.length, RecordWithBooleanArrayField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = boolean[].class, nulls = false) + public void objectWithArrayOfPrimitiveBooleans(String jsonStr, boolean[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithPrimitiveBooleanArrayField object = parser.parse(json, json.length, RecordWithPrimitiveBooleanArrayField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Boolean[].class, nulls = false) + public void objectWithListOfBooleans(String jsonStr, Boolean[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithBooleanListField object = parser.parse(json, json.length, RecordWithBooleanListField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Boolean[].class, nulls = true) + public void objectWithListOfBooleansAndNulls(String jsonStr, Boolean[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithBooleanListField object = parser.parse(json, json.length, RecordWithBooleanListField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @Test + public void missingBooleanField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"intField\": 1}"); + + // when + RecordWithBooleanField object = parser.parse(json, json.length, RecordWithBooleanField.class); + + // then + assertThat(object.field()).isNull(); + } + + @Test + public void missingPrimitiveBooleanField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"intField\": 1}"); + + // when + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveBooleanField.class) + ); + + // then + assertThat(ex.getCause()).isInstanceOf(NullPointerException.class); + } + + @ParameterizedTest + @ValueSource(classes = {boolean.class, Boolean.class}) + public void emptyJson(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("No structural element found."); + } + + @ParameterizedTest + @ValueSource(classes = {boolean.class, Boolean.class}) + public void passedLengthSmallerThanTrueLength(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("true")); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'true'."); + } + + @ParameterizedTest + @ValueSource(classes = {boolean.class, Boolean.class}) + public void passedLengthSmallerThanFalseLength(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("false")); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 4, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'false'."); + } + + @Test + public void passedLengthSmallerThanNullLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("null")); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, Boolean.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } +} diff --git a/src/test/java/org/simdjson/FloatingPointNumberSchemaBasedParsingTest.java b/src/test/java/org/simdjson/FloatingPointNumberSchemaBasedParsingTest.java new file mode 100644 index 0000000..75cc6ae --- /dev/null +++ b/src/test/java/org/simdjson/FloatingPointNumberSchemaBasedParsingTest.java @@ -0,0 +1,1297 @@ +package org.simdjson; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.junitpioneer.jupiter.cartesian.CartesianTest; +import org.junitpioneer.jupiter.cartesian.CartesianTest.Values; +import org.simdjson.schemas.RecordWithBooleanField; +import org.simdjson.schemas.RecordWithByteArrayField; +import org.simdjson.schemas.RecordWithDoubleArrayField; +import org.simdjson.schemas.RecordWithDoubleField; +import org.simdjson.schemas.RecordWithDoubleListField; +import org.simdjson.schemas.RecordWithFloatArrayField; +import org.simdjson.schemas.RecordWithFloatField; +import org.simdjson.schemas.RecordWithFloatListField; +import org.simdjson.schemas.RecordWithPrimitiveBooleanField; +import org.simdjson.schemas.RecordWithPrimitiveDoubleArrayField; +import org.simdjson.schemas.RecordWithPrimitiveDoubleField; +import org.simdjson.schemas.RecordWithPrimitiveFloatArrayField; +import org.simdjson.schemas.RecordWithPrimitiveFloatField; +import org.simdjson.schemas.RecordWithStringField; +import org.simdjson.testutils.CartesianTestCsv; +import org.simdjson.testutils.CartesianTestCsvRow; +import org.simdjson.testutils.FloatingPointNumberTestFile; +import org.simdjson.testutils.FloatingPointNumberTestFile.FloatingPointNumberTestCase; +import org.simdjson.testutils.FloatingPointNumberTestFilesSource; +import org.simdjson.testutils.MapEntry; +import org.simdjson.testutils.MapSource; +import org.simdjson.testutils.SchemaBasedRandomValueSource; + +import java.io.IOException; +import java.math.BigDecimal; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.TestUtils.padWithSpaces; +import static org.simdjson.TestUtils.toUtf8; + +public class FloatingPointNumberSchemaBasedParsingTest { + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = {Float.class, float.class, Double.class, double.class}, nulls = false) + public void floatingPointNumberAtRoot(String numberStr, Class schema, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numberStr); + + // when + Object number = parser.parse(json, json.length, schema); + + // then + assertThat(number).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(classes = {Float.class, Double.class}) + public void nullAtRootWhenFloatingPointNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + Object value = parser.parse(json, json.length, schema); + + // then + assertThat(value).isNull(); + } + + @ParameterizedTest + @ValueSource(classes = {float.class, double.class}) + public void nullAtRootWhenPrimitiveFloatingPointNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, schema)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource( + schemas = { + RecordWithFloatField.class, + RecordWithPrimitiveFloatField.class, + RecordWithDoubleField.class, + RecordWithPrimitiveDoubleField.class + }, + nulls = false + ) + public void floatingPointNumberAtObjectField(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(classes = {RecordWithFloatField.class, RecordWithDoubleField.class}) + public void nullAtObjectFieldWhenFloatingPointNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).extracting("field").isNull(); + } + + @ParameterizedTest + @ValueSource(classes = {RecordWithPrimitiveFloatField.class, RecordWithPrimitiveDoubleField.class}) + public void nullAtObjectFieldWhenPrimitiveFloatingPointNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, schema)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = {Float[].class, float[].class, Double[].class, double[].class}, nulls = false) + public void arrayOfFloatingPointNumbersAtRoot(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object array = parser.parse(json, json.length, schema); + + // then + assertThat(array.getClass().isArray()).isTrue(); + assertThat(array).isEqualTo(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = {Float[].class, Double[].class}, nulls = true) + public void arrayOfFloatingPointNumbersAndNullsAtRoot(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object array = parser.parse(json, json.length, schema); + + // then + assertThat(array.getClass().isArray()).isTrue(); + assertThat(array).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(classes = {float.class, double.class}) + public void arrayOfPrimitiveFloatingPointNumbersAndNullsAtRoot(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[-1.1, 1.0, 0.0, null]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, schema)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource( + schemas = { + RecordWithFloatArrayField.class, + RecordWithPrimitiveFloatArrayField.class, + RecordWithDoubleArrayField.class, + RecordWithPrimitiveDoubleArrayField.class + }, + nulls = false + ) + public void objectWithArrayOfFloatingPointNumbers(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).usingRecursiveComparison().isEqualTo(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource( + schemas = { + RecordWithFloatArrayField.class, + RecordWithFloatListField.class, + RecordWithDoubleArrayField.class, + RecordWithDoubleListField.class + }, + nulls = true + ) + public void objectWithArrayOfFloatingPointNumbersWithNulls(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).usingRecursiveComparison().isEqualTo(expected); + } + + @CartesianTest + public void leadingZerosAreNotAllowed( + @Values(strings = {"01.0", "-01.0", "000.0", "-000.0", "012e34"}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Leading zeroes are not allowed."); + } + + @CartesianTest + public void minusHasToBeFollowedByAtLeastOneDigit( + @Values(strings = {"-a123.0", "--123.0", "-+123.0", "-.123", "-e123",}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @CartesianTest + public void numberHasToBeFollowedByStructuralCharacterOrWhitespace( + @Values(strings = {"-1.0-2", "1.0a", "12E12.12", "1e2e3"}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Number has to be followed by a structural character or whitespace."); + } + + @CartesianTest + public void decimalPointHasToBeFollowedByAtLeastOneDigit( + @Values(strings = {"123.", "1..1", "1.e1", "1.E1"}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Decimal point has to be followed by a digit."); + } + + @CartesianTest + public void exponentIndicatorHasToBeFollowedByAtLeastOneDigit( + @Values(strings = {"1e+-2", "1E+-2", "1e--23", "1E--23", "1ea", "1Ea", "1e", "1E", "1e+", "1E+"}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Exponent indicator has to be followed by a digit."); + } + + @ParameterizedTest + @ValueSource(classes = {float.class, Float.class, Double.class, double.class}) + public void startingWithPlusIsNotAllowed(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("+1.0"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @CartesianTest + public void numberHasToStartWithMinusOrDigit( + @Values(strings = {"a123", "a-123"}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @CartesianTest + public void positiveDoubleZero( + @Values(strings = { + "0.0", + "2251799813685248e-342", + "9999999999999999999e-343", + "1.23e-341", + "123e-343", + "0.0e-999", + "0e9999999999999999999999999999", + "18446744073709551615e-343", + "0.099999999999999999999e-323", + "0.99999999999999999999e-324", + "0.9999999999999999999e-324" + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0.0d); + } + + @CartesianTest + public void negativeDoubleZero( + @Values(strings = { + "-0.0", + "-2251799813685248e-342", + "-9999999999999999999e-343", + "-1.23e-341", + "-123e-343", + "-0.0e-999", + "-0e9999999999999999999999999999", + "-18446744073709551615e-343", + "-0.099999999999999999999e-323", + "-0.99999999999999999999e-324", + "-0.9999999999999999999e-324" + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(-0.0d); + } + + @CartesianTest + public void positiveFloatZero( + @Values(strings = { + "0.0", + "1e-58", + "1e-64", + "0.0e-999", + "0e9999999999999999999999999999", + "18446744073709551615e-66", + "0.99999999999999999999e-46" + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0.0f); + } + + @CartesianTest + public void negativeFloatZero( + @Values(strings = { + "-0.0", + "-1e-58", + "-1e-64", + "-0.0e-999", + "-0e9999999999999999999999999999", + "-18446744073709551615e-66", + "-0.99999999999999999999e-46" + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(-0.0f); + } + + @CartesianTest + public void exactDouble( + @CartesianTestCsv({ + "9007199254740991.0, 9007199254740991", + "9007199254740992.0, 9007199254740992", + "18014398509481988.0, 18014398509481988" + }) CartesianTestCsvRow row, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(row.getValueAsString(0)); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(row.getValueAsDouble(1)); + } + + @CartesianTest + public void exactFloat( + @CartesianTestCsv({ + "16777215.0, 16777215", + "16777216.0, 16777216", + "33554436.0, 33554436" + }) CartesianTestCsvRow row, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(row.getValueAsString(0)); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(row.getValueAsFloat(1)); + } + + @CartesianTest + public void minNormalDouble( + @Values(strings = { + "2.2250738585072016e-308", + "2.2250738585072015e-308", + "2.2250738585072014e-308", + "2.2250738585072013e-308", + "2.2250738585072012e-308" + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x1.0p-1022d); + } + + @CartesianTest + public void minNormalFloat( + @Values(strings = { + "1.17549433E-38", + "1.17549434E-38", + "1.17549435E-38", + "1.17549436E-38", + "1.17549437E-38" + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x1.0p-126f); + } + + @CartesianTest + public void maxSubnormalDouble( + @Values(strings = { + "2.2250738585072011e-308", + "2.2250738585072010e-308", + "2.2250738585072009e-308", + "2.2250738585072008e-308", + "2.2250738585072007e-308", + "0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022250738585072008890245868760858598876504231122409594654935248025624400092282356951787758888037591552642309780950434312085877387158357291821993020294379224223559819827501242041788969571311791082261043971979604000454897391938079198936081525613113376149842043271751033627391549782731594143828136275113838604094249464942286316695429105080201815926642134996606517803095075913058719846423906068637102005108723282784678843631944515866135041223479014792369585208321597621066375401613736583044193603714778355306682834535634005074073040135602968046375918583163124224521599262546494300836851861719422417646455137135420132217031370496583210154654068035397417906022589503023501937519773030945763173210852507299305089761582519159720757232455434770912461317493580281734466552734375", + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x0.fffffffffffffp-1022d); + } + + @CartesianTest + public void maxSubnormalFloat( + @Values(strings = { + "1.1754942e-38", + "0.0000000000000000000000000000000000000117549421069244107548702944485", + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x0.fffffep-126f); + } + + @CartesianTest + public void minSubnormalDouble( + @Values(strings = { + "3e-324", + "4.9e-324", + "4.9406564584124654e-324", + "4.94065645841246544176568792868e-324", + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x0.0000000000001p-1022d); + } + + @CartesianTest + public void minSubnormalFloat( + @Values(strings = { + "1e-45", + "1.4e-45", + "1.4012984643248170e-45", + "1.40129846432481707092372958329e-45", + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x0.000002p-126f); + } + + @CartesianTest + public void maxDouble( + @Values(strings = { + "1.7976931348623157e308", + "1.7976931348623158e308", + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x1.fffffffffffffp+1023d); + } + + @CartesianTest + public void maxFloat( + @Values(strings = { + "3.4028234664e38", + "3.4028234665e38", + "3.4028234666e38", + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x1.fffffep+127f); + } + + @CartesianTest + public void positiveDoubleInfinity( + @Values(strings = { + "1.9e308", + "1.8e308", + "1234456789012345678901234567890e9999999999999999999999999999", + "1.832312213213213232132132143451234453123412321321312e308", + "2139879401095466344511101915470454744.9813888656856943E+272", + "2e30000000000000000", + "2e3000", + "1234456789012345678901234567890e999999999999999999999999999", + "1.7976931348623159e308", + "1438456663141390273526118207642235581183227845246331231162636653790368152091394196930365828634687637948157940776599182791387527135353034738357134110310609455693900824193549772792016543182680519740580354365467985440183598701312257624545562331397018329928613196125590274187720073914818062530830316533158098624984118889298281371812288789537310599037529113415438738954894752124724983067241108764488346454376699018673078404751121414804937224240805993123816932326223683090770561597570457793932985826162604255884529134126396282202126526253389383421806727954588525596114379801269094096329805054803089299736996870951258573010877404407451953846698609198213926882692078557033228265259305481198526059813164469187586693257335779522020407645498684263339921905227556616698129967412891282231685504660671277927198290009824680186319750978665734576683784255802269708917361719466043175201158849097881370477111850171579869056016061666173029059588433776015644439705050377554277696143928278093453792803846252715966016733222646442382892123940052441346822429721593884378212558701004356924243030059517489346646577724622498919752597382095222500311124181823512251071356181769376577651390028297796156208815375089159128394945710515861334486267101797497111125909272505194792870889617179758703442608016143343262159998149700606597792535574457560429226974273443630323818747730771316763398572110874959981923732463076884528677392654150010269822239401993427482376513231389212353583573566376915572650916866553612366187378959554983566712767093372906030188976220169058025354973622211666504549316958271880975697143546564469806791358707318873075708383345004090151974068325838177531266954177406661392229801349994695941509935655355652985723782153570084089560139142231.738475042362596875449154552392299548947138162081694168675340677843807613129780449323363759027012972466987370921816813162658754726545121090545507240267000456594786540949605260722461937870630634874991729398208026467698131898691830012167897399682179601734569071423681e-733" + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Double.POSITIVE_INFINITY); + } + + @CartesianTest + public void negativeDoubleInfinity( + @Values(strings = { + "-1.9e308", + "-1.8e308", + "-1234456789012345678901234567890e9999999999999999999999999999", + "-1.832312213213213232132132143451234453123412321321312e308", + "-2139879401095466344511101915470454744.9813888656856943E+272", + "-2e30000000000000000", + "-2e3000", + "-1234456789012345678901234567890e999999999999999999999999999", + "-1.7976931348623159e308", + "-1438456663141390273526118207642235581183227845246331231162636653790368152091394196930365828634687637948157940776599182791387527135353034738357134110310609455693900824193549772792016543182680519740580354365467985440183598701312257624545562331397018329928613196125590274187720073914818062530830316533158098624984118889298281371812288789537310599037529113415438738954894752124724983067241108764488346454376699018673078404751121414804937224240805993123816932326223683090770561597570457793932985826162604255884529134126396282202126526253389383421806727954588525596114379801269094096329805054803089299736996870951258573010877404407451953846698609198213926882692078557033228265259305481198526059813164469187586693257335779522020407645498684263339921905227556616698129967412891282231685504660671277927198290009824680186319750978665734576683784255802269708917361719466043175201158849097881370477111850171579869056016061666173029059588433776015644439705050377554277696143928278093453792803846252715966016733222646442382892123940052441346822429721593884378212558701004356924243030059517489346646577724622498919752597382095222500311124181823512251071356181769376577651390028297796156208815375089159128394945710515861334486267101797497111125909272505194792870889617179758703442608016143343262159998149700606597792535574457560429226974273443630323818747730771316763398572110874959981923732463076884528677392654150010269822239401993427482376513231389212353583573566376915572650916866553612366187378959554983566712767093372906030188976220169058025354973622211666504549316958271880975697143546564469806791358707318873075708383345004090151974068325838177531266954177406661392229801349994695941509935655355652985723782153570084089560139142231.738475042362596875449154552392299548947138162081694168675340677843807613129780449323363759027012972466987370921816813162658754726545121090545507240267000456594786540949605260722461937870630634874991729398208026467698131898691830012167897399682179601734569071423681e-733" + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Double.NEGATIVE_INFINITY); + } + + @CartesianTest + public void positiveFloatInfinity( + @Values(strings = { + "1.9e39", + "1.8e39", + "1.9e40", + "1.8e40", + "1234456789012345678901234567890e9999999999999999999999999999", + "3.532312213213213232132132143451234453123412321321312e38", + "2139879401095466344511101915470454744.9813888656856943E+3", + "2e30000000000000000", + "2e3000", + "3.4028236e38", + "1438456663141390273526118207642235581183227845246331231162636653790368152091394196930365828634687637948157940776599182791387527135353034738357134110310609455693900824193549772792016543182680519740580354365467985440183598701312257624545562331397018329928613196125590274187720073914818062530830316533158098624984118889298281371812288789537310599037529113415438738954894752124724983067241108764488346454376699018673078404751121414804937224240805993123816932326223683090770561597570457793932985826162604255884529134126396282202126526253389383421806727954588525596114379801269094096329805054803089299736996870951258573010877404407451953846698609198213926882692078557033228265259305481198526059813164469187586693257335779522020407645498684263339921905227556616698129967412891282231685504660671277927198290009824680186319750978665734576683784255802269708917361719466043175201158849097881370477111850171579869056016061666173029059588433776015644439705050377554277696143928278093453792803846252715966016733222646442382892123940052441346822429721593884378212558701004356924243030059517489346646577724622498919752597382095222500311124181823512251071356181769376577651390028297796156208815375089159128394945710515861334486267101797497111125909272505194792870889617179758703442608016143343262159998149700606597792535574457560429226974273443630323818747730771316763398572110874959981923732463076884528677392654150010269822239401993427482376513231389212353583573566376915572650916866553612366187378959554983566712767093372906030188976220169058025354973622211666504549316958271880975697143546564469806791358707318873075708383345004090151974068325838177531266954177406661392229801349994695941509935655355652985723782153570084089560139142231.738475042362596875449154552392299548947138162081694168675340677843807613129780449323363759027012972466987370921816813162658754726545121090545507240267000456594786540949605260722461937870630634874991729398208026467698131898691830012167897399682179601734569071423681e-733" + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Float.POSITIVE_INFINITY); + } + + @CartesianTest + public void negativeFloatInfinity( + @Values(strings = { + "-1.9e39", + "-1.8e39", + "-1.9e40", + "-1.8e40", + "-1234456789012345678901234567890e9999999999999999999999999999", + "-3.532312213213213232132132143451234453123412321321312e38", + "-2139879401095466344511101915470454744.9813888656856943E+3", + "-2e30000000000000000", + "-2e3000", + "-3.4028236e38", + "-1438456663141390273526118207642235581183227845246331231162636653790368152091394196930365828634687637948157940776599182791387527135353034738357134110310609455693900824193549772792016543182680519740580354365467985440183598701312257624545562331397018329928613196125590274187720073914818062530830316533158098624984118889298281371812288789537310599037529113415438738954894752124724983067241108764488346454376699018673078404751121414804937224240805993123816932326223683090770561597570457793932985826162604255884529134126396282202126526253389383421806727954588525596114379801269094096329805054803089299736996870951258573010877404407451953846698609198213926882692078557033228265259305481198526059813164469187586693257335779522020407645498684263339921905227556616698129967412891282231685504660671277927198290009824680186319750978665734576683784255802269708917361719466043175201158849097881370477111850171579869056016061666173029059588433776015644439705050377554277696143928278093453792803846252715966016733222646442382892123940052441346822429721593884378212558701004356924243030059517489346646577724622498919752597382095222500311124181823512251071356181769376577651390028297796156208815375089159128394945710515861334486267101797497111125909272505194792870889617179758703442608016143343262159998149700606597792535574457560429226974273443630323818747730771316763398572110874959981923732463076884528677392654150010269822239401993427482376513231389212353583573566376915572650916866553612366187378959554983566712767093372906030188976220169058025354973622211666504549316958271880975697143546564469806791358707318873075708383345004090151974068325838177531266954177406661392229801349994695941509935655355652985723782153570084089560139142231.738475042362596875449154552392299548947138162081694168675340677843807613129780449323363759027012972466987370921816813162658754726545121090545507240267000456594786540949605260722461937870630634874991729398208026467698131898691830012167897399682179601734569071423681e-733" + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Float.NEGATIVE_INFINITY); + } + + @CartesianTest + public void roundingOverflowForDouble( + @Values(strings = { + // In this case the binary significand after rounding up is equal to 9007199254740992 (2^53), + // which is more than we can store (2^53 - 1). + "7.2057594037927933e16", + "72057594037927933.0000000000000000", + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x1.0p+56d); + } + + @CartesianTest + public void roundingOverflowForFloat( + @Values(strings = { + // In this case the binary significand after rounding up is equal to 16777216 (2^24), + // which is more than we can store (2^24 - 1). + "7.2057594e16", + "72057594000000000.0000000", + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x1.0p+56f); + } + + @CartesianTest + public void exponentWithMoreDigitsThanLongCanAccommodateAndLeadingZeros( + @CartesianTestCsv({ + "1e000000000000000000001, 10.0", + "1e-000000000000000000001, 0.1" + }) CartesianTestCsvRow row, + @Values(classes = {Float.class, float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(row.getValueAsString(0)); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(row.getValue(1, expectedType)); + } + + @CartesianTest + public void exponentWithMoreDigitsThanLongCanAccommodate( + @CartesianTestCsv({ + "0e999999999999999999999, 0.0", + "0e-999999999999999999999, 0.0", + "1e999999999999999999999, Infinity", + "1e-999999999999999999999, 0.0", + "9999999999999999999999999999999999999999e-999999999999999999999, 0.0", + "0.9999999999999999999999999999999999999999e999999999999999999999, Infinity" + }) CartesianTestCsvRow row, + @Values(classes = {Float.class, float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(row.getValueAsString(0)); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(row.getValue(1, expectedType)); + } + + @CartesianTest + public void doubleRoundTiesToEven( + @Values(strings = { + "2251799813685803.75", + "4503599627370497.5", + "4503599627475353.5", + "9007199254740993.0", + "4503599627370496.5", + "4503599627475352.5", + "2251799813685248.25", + "2.22507385850720212418870147920222032907240528279439037814303133837435107319244194686754406432563881851382188218502438069999947733013005649884107791928741341929297200970481951993067993290969042784064731682041565926728632933630474670123316852983422152744517260835859654566319282835244787787799894310779783833699159288594555213714181128458251145584319223079897504395086859412457230891738946169368372321191373658977977723286698840356390251044443035457396733706583981055420456693824658413747607155981176573877626747665912387199931904006317334709003012790188175203447190250028061277777916798391090578584006464715943810511489154282775041174682194133952466682503431306181587829379004205392375072083366693241580002758391118854188641513168478436313080237596295773983001708984375e-308", + "1125899906842624.125", + "1125899906842901.875", + "9007199254740993.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + }) String numberStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numberStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Double.parseDouble(numberStr)); + } + + @CartesianTest + public void doubleRoundUpToNearest( + @Values(strings = { + "2251799813685803.15", + "4503599627370497.2", + "45035996.273704985", + "4503599627475353.2", + "9355950000000000000.00000000000000000000000000000000001844674407370955161600000184467440737095516161844674407370955161407370955161618446744073709551616000184467440737095516166000001844674407370955161618446744073709551614073709551616184467440737095516160001844674407370955161601844674407370955674451616184467440737095516140737095516161844674407370955161600018446744073709551616018446744073709551611616000184467440737095001844674407370955161600184467440737095516160018446744073709551168164467440737095516160001844073709551616018446744073709551616184467440737095516160001844674407536910751601611616000184467440737095001844674407370955161600184467440737095516160018446744073709551616184467440737095516160001844955161618446744073709551616000184467440753691075160018446744073709", + "1.0000000000000006661338147750939242541790008544921875", + "-92666518056446206563e3", + "90054602635948575728e72", + "7.0420557077594588669468784357561207962098443483187940792729600000e59", + }) String numberStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numberStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Double.parseDouble(numberStr)); + } + + @CartesianTest + public void doubleRoundDownToNearest( + @Values(strings = { + "2251799813685803.15", + "4503599627370497.2", + "45035996.273704985", + "4503599627475353.2", + "9355950000000000000.00000000000000000000000000000000001844674407370955161600000184467440737095516161844674407370955161407370955161618446744073709551616000184467440737095516166000001844674407370955161618446744073709551614073709551616184467440737095516160001844674407370955161601844674407370955674451616184467440737095516140737095516161844674407370955161600018446744073709551616018446744073709551611616000184467440737095001844674407370955161600184467440737095516160018446744073709551168164467440737095516160001844073709551616018446744073709551616184467440737095516160001844674407536910751601611616000184467440737095001844674407370955161600184467440737095516160018446744073709551616184467440737095516160001844955161618446744073709551616000184467440753691075160018446744073709", + "1.0000000000000006661338147750939242541790008544921875", + "-92666518056446206563e3", + "90054602635948575728e72", + "7.0420557077594588669468784357561207962098443483187940792729600000e59", + }) String numberStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numberStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Double.parseDouble(numberStr)); + } + + @CartesianTest + public void floatRoundTiesToEven( + @Values(strings = { + "1.1754941406275178592461758986628081843312458647327962400313859427181746759860647699724722770042717456817626953125e-38", + "30219.0830078125", + "16252921.5", + "5322519.25", + "3900245.875", + "1510988.3125", + "782262.28125", + "328381.484375", + "156782.0703125", + "85003.24609375", + "17419.6494140625", + "15498.36376953125", + "6318.580322265625", + "2525.2840576171875", + "16407.9462890625", + "8388614.5" + }) String numberStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numberStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Float.parseFloat(numberStr)); + } + + @CartesianTest + public void floatRoundUpToNearest( + @Values(strings = { + "1.1754941406275178592461758986628081843312458647327962400313859427181746759860647699724722770042717456817626953125", + "1.1754943508e-38", + "16252921.5", + "3900245.875", + "328381.484375", + "85003.24609375", + "2525.2840576171875", + "936.3702087402344", + "411.88682556152344", + "206.50310516357422", + "124.16878890991211", + "50.811574935913086", + "13.91745138168335", + "2.687217116355896", + "1.1877630352973938", + "0.09289376810193062", + "0.03706067614257336", + "0.028068351559340954", + "0.012114629615098238", + "0.004221370676532388", + "0.002153817447833717", + "0.0015924838953651488", + "0.00036393293703440577", + "1.1754947011469036e-38", + "7.0064923216240854e-46", + "4.7019774032891500318749461488889827112746622270883500860350068251e-38", + "3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679", + }) String numberStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numberStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Float.parseFloat(numberStr)); + } + + @CartesianTest + public void floatRoundDownToNearest( + @Values(strings = { + "1.1754941406275178592461758986628081843312458647327962400313859427181746759860647699724722770042717456817626953125", + "30219.0830078125", + "5322519.25", + "1510988.3125", + "782262.28125", + "156782.0703125", + "17419.6494140625", + "15498.36376953125", + "6318.580322265625", + "1370.9265747070312", + "17.486443519592285", + "7.5464513301849365", + "0.7622503340244293", + "0.30531780421733856", + "0.21791061013936996", + "0.0008602388261351734", + "0.00013746770127909258", + "16407.9462890625", + "8388614.5", + "2.3509887016445750159374730744444913556373311135441750430175034126e-38", + "3.4028234664e38", + "3.4028234665e38", + "3.4028234666e38", + "0.000000000000000000000000000000000000011754943508222875079687365372222456778186655567720875215087517062784172594547271728515625", + "0.00000000000000000000000000000000000000000000140129846432481707092372958328991613128026194187651577175706828388979108268586060148663818836212158203125", + "0.00000000000000000000000000000000000002350988561514728583455765982071533026645717985517980855365926236850006129930346077117064851336181163787841796875", + "0.00000000000000000000000000000000000001175494210692441075487029444849287348827052428745893333857174530571588870475618904265502351336181163787841796875", + }) String numberStr, + @Values(classes = {Float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numberStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Float.parseFloat(numberStr)); + } + + @CartesianTest + public void moreValuesThanOneFloatingPointNumberAtRoot( + @Values(strings = {"123.0,", "123.0{}", "1.0:"}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = BigDecimal.class, value = "Class: java.math.BigDecimal has more than one constructor."), + @MapEntry(classKey = Number.class, value = "Unsupported class: java.lang.Number. Interfaces and abstract classes are not supported."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = Boolean.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = byte[].class, value = "Expected '[' but got: '1'.") + }) + public void mismatchedTypeForFloatingPointNumberAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("123.0"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithBooleanField.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = RecordWithByteArrayField.class, value = "Expected '[' but got: '1'.") + }) + public void mismatchedTypeForFloatingPointNumberAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": 123.0}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @ValueSource(classes = {float[].class, Float[].class, double[].class, Double[].class}) + public void arrayOfFloatingPointNumbersMixedWithOtherTypesAtRoot(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1.0, -1.0, true]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @ValueSource(classes = { + RecordWithFloatField.class, + RecordWithPrimitiveFloatField.class, + RecordWithDoubleField.class, + RecordWithPrimitiveDoubleField.class + }) + public void arrayOfFloatingPointNumbersMixedWithOtherTypesAtObjectField(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1.0, -1.0, true]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = BigDecimal[].class, value = "Class: java.math.BigDecimal has more than one constructor."), + @MapEntry(classKey = Number[].class, value = "Unsupported class: java.lang.Number. Interfaces and abstract classes are not supported."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = int.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = byte.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = Byte.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = byte[][].class, value = "Expected '[' but got: '1'."), + @MapEntry(classKey = Byte[][].class, value = "Expected '[' but got: '1'.") + }) + public void mismatchedTypeForArrayOfFloatingPointNumbersAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1.0, -1.0, 0]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = byte[].class, value = "Expected '[' but got: '{'."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithBooleanField.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = RecordWithPrimitiveBooleanField.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = byte.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = Byte.class, value = "Invalid number. Minus has to be followed by a digit.") + }) + public void mismatchedTypeForArrayOfFloatingPointNumbersAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1.0, -1.0, 0.0]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @FloatingPointNumberTestFilesSource + public void testFilesForPrimitiveDouble(FloatingPointNumberTestFile file) throws IOException { + // given + SimdJsonParser parser = new SimdJsonParser(); + + try (FloatingPointNumberTestFile.FloatingPointNumberTestCasesIterator it = file.iterator()) { + while (it.hasNext()) { + FloatingPointNumberTestCase testCase = it.next(); + byte[] json = toUtf8(testCase.input()); + + // when + double value = parser.parse(json, json.length, double.class); + + // then + assertThat(value) + .withFailMessage("%nline: %d%n expected: %s%n was: %s", testCase.line(), testCase.expectedDouble(), value) + .isEqualTo(testCase.expectedDouble()); + } + } + } + + @ParameterizedTest + @FloatingPointNumberTestFilesSource + public void testFilesForDouble(FloatingPointNumberTestFile file) throws IOException { + // given + SimdJsonParser parser = new SimdJsonParser(); + + try (FloatingPointNumberTestFile.FloatingPointNumberTestCasesIterator it = file.iterator()) { + while (it.hasNext()) { + FloatingPointNumberTestCase testCase = it.next(); + byte[] json = toUtf8(testCase.input()); + + // when + Double value = parser.parse(json, json.length, Double.class); + + // then + assertThat(value) + .withFailMessage("%nline: %d%nexpected: %s%nwas: %s", testCase.line(), testCase.expectedDouble(), value) + .isEqualTo(testCase.expectedDouble()); + } + } + } + + @ParameterizedTest + @FloatingPointNumberTestFilesSource + public void testFilesForPrimitiveFloat(FloatingPointNumberTestFile file) throws IOException { + // given + SimdJsonParser parser = new SimdJsonParser(); + + try (FloatingPointNumberTestFile.FloatingPointNumberTestCasesIterator it = file.iterator()) { + while (it.hasNext()) { + FloatingPointNumberTestCase testCase = it.next(); + byte[] json = toUtf8(testCase.input()); + + // when + float value = parser.parse(json, json.length, float.class); + + // then + assertThat(value) + .withFailMessage("%nline: %d%n expected: %s%n was: %s", testCase.line(), testCase.expectedFloat(), value) + .isEqualTo(testCase.expectedFloat()); + } + } + } + + @ParameterizedTest + @FloatingPointNumberTestFilesSource + public void testFilesForFloat(FloatingPointNumberTestFile file) throws IOException { + // given + SimdJsonParser parser = new SimdJsonParser(); + + try (FloatingPointNumberTestFile.FloatingPointNumberTestCasesIterator it = file.iterator()) { + while (it.hasNext()) { + FloatingPointNumberTestCase testCase = it.next(); + byte[] json = toUtf8(testCase.input()); + + // when + Float value = parser.parse(json, json.length, Float.class); + + // then + assertThat(value) + .withFailMessage("%nline: %d%nexpected: %s%nwas: %s", testCase.line(), testCase.expectedFloat(), value) + .isEqualTo(testCase.expectedFloat()); + } + } + } + + @CartesianTest + public void integralNumberAsFloatingPointNumber( + @Values(strings = {"123", "0", "-123"}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid floating-point number. Fraction or exponent part is missing."); + } + + @ParameterizedTest + @ValueSource(classes = {float.class, Float.class, double.class, Double.class}) + public void emptyJson(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("No structural element found."); + } + + @ParameterizedTest + @ValueSource(classes = {Float.class, Double.class}) + public void passedLengthSmallerThanNullLength(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("null")); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } + + @ParameterizedTest + @ValueSource(classes = {float.class, Float.class, double.class, Double.class}) + public void passedLengthSmallerThanNumberLength(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("1.234")); + + // when + Object value = parser.parse(json, 3, expectedType); + + // then + assertThat(value.toString()).isEqualTo("1.2"); + } +} diff --git a/src/test/java/org/simdjson/IntegralNumberSchemaBasedParsingTest.java b/src/test/java/org/simdjson/IntegralNumberSchemaBasedParsingTest.java new file mode 100644 index 0000000..041c725 --- /dev/null +++ b/src/test/java/org/simdjson/IntegralNumberSchemaBasedParsingTest.java @@ -0,0 +1,779 @@ +package org.simdjson; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.junitpioneer.jupiter.cartesian.CartesianTest; +import org.junitpioneer.jupiter.cartesian.CartesianTest.Values; +import org.simdjson.schemas.RecordWithBooleanField; +import org.simdjson.schemas.RecordWithByteArrayField; +import org.simdjson.schemas.RecordWithByteField; +import org.simdjson.schemas.RecordWithByteListField; +import org.simdjson.schemas.RecordWithIntegerArrayField; +import org.simdjson.schemas.RecordWithIntegerField; +import org.simdjson.schemas.RecordWithIntegerListField; +import org.simdjson.schemas.RecordWithLongArrayField; +import org.simdjson.schemas.RecordWithLongField; +import org.simdjson.schemas.RecordWithLongListField; +import org.simdjson.schemas.RecordWithPrimitiveBooleanField; +import org.simdjson.schemas.RecordWithPrimitiveByteArrayField; +import org.simdjson.schemas.RecordWithPrimitiveByteField; +import org.simdjson.schemas.RecordWithPrimitiveIntegerArrayField; +import org.simdjson.schemas.RecordWithPrimitiveIntegerField; +import org.simdjson.schemas.RecordWithPrimitiveLongArrayField; +import org.simdjson.schemas.RecordWithPrimitiveLongField; +import org.simdjson.schemas.RecordWithPrimitiveShortArrayField; +import org.simdjson.schemas.RecordWithPrimitiveShortField; +import org.simdjson.schemas.RecordWithShortArrayField; +import org.simdjson.schemas.RecordWithShortField; +import org.simdjson.schemas.RecordWithShortListField; +import org.simdjson.schemas.RecordWithStringField; +import org.simdjson.testutils.MapEntry; +import org.simdjson.testutils.MapSource; +import org.simdjson.testutils.RandomIntegralNumberSource; +import org.simdjson.testutils.SchemaBasedRandomValueSource; + +import java.math.BigInteger; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.TestUtils.padWithSpaces; +import static org.simdjson.TestUtils.toUtf8; + +public class IntegralNumberSchemaBasedParsingTest { + + @ParameterizedTest + @RandomIntegralNumberSource( + classes = { + Byte.class, + byte.class, + Short.class, + short.class, + Integer.class, + int.class, + Long.class, + long.class + }, + includeMinMax = true + ) + public void integralNumberAtRoot(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, schema); + + // then + assertThat(value).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(classes = {Byte.class, Short.class, Integer.class, Long.class}) + public void nullAtRootWhenIntegralNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + Object value = parser.parse(json, json.length, schema); + + // then + assertThat(value).isNull(); + } + + @ParameterizedTest + @ValueSource(classes = {byte.class, short.class, int.class, long.class}) + public void nullAtRootWhenPrimitiveIntegralNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, schema)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @RandomIntegralNumberSource( + classes = { + RecordWithByteField.class, + RecordWithPrimitiveByteField.class, + RecordWithShortField.class, + RecordWithPrimitiveShortField.class, + RecordWithIntegerField.class, + RecordWithPrimitiveIntegerField.class, + RecordWithLongField.class, + RecordWithPrimitiveLongField.class + }, + includeMinMax = true + ) + public void integralNumberAtObjectField(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(classes = { + RecordWithByteField.class, + RecordWithShortField.class, + RecordWithIntegerField.class, + RecordWithLongField.class + }) + public void nullAtObjectFieldWhenIntegralNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).extracting("field").isNull(); + } + + @ParameterizedTest + @ValueSource(classes = { + RecordWithPrimitiveByteField.class, + RecordWithPrimitiveShortField.class, + RecordWithPrimitiveIntegerField.class, + RecordWithPrimitiveLongField.class + }) + public void nullAtObjectFieldWhenPrimitiveIntegralNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, schema)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource( + schemas = { + Byte[].class, + byte[].class, + Short[].class, + short[].class, + Integer[].class, + int[].class, + Long[].class, + long[].class + }, + nulls = false + ) + public void arrayOfIntegralNumbersAtRoot(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object array = parser.parse(json, json.length, schema); + + // then + assertThat(array.getClass().isArray()).isTrue(); + assertThat(array).isEqualTo(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource( + schemas = { + Byte[].class, + Short[].class, + Integer[].class, + Long[].class + }, + nulls = true + ) + public void arrayOfIntegralNumbersAndNullsAtRoot(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object array = parser.parse(json, json.length, schema); + + // then + assertThat(array.getClass().isArray()).isTrue(); + assertThat(array).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(classes = {byte.class, short.class, int.class, long.class}) + public void arrayOfPrimitiveIntegralNumbersAndNullsAtRoot(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[-128, 1, 127, null]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, schema)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource( + schemas = { + RecordWithByteArrayField.class, + RecordWithPrimitiveByteArrayField.class, + RecordWithShortArrayField.class, + RecordWithPrimitiveShortArrayField.class, + RecordWithIntegerArrayField.class, + RecordWithPrimitiveIntegerArrayField.class, + RecordWithLongArrayField.class, + RecordWithPrimitiveLongArrayField.class + }, + nulls = false + ) + public void objectWithArrayOfIntegralNumbers(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).usingRecursiveComparison().isEqualTo(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource( + schemas = { + RecordWithByteArrayField.class, + RecordWithByteListField.class, + RecordWithShortArrayField.class, + RecordWithShortListField.class, + RecordWithIntegerArrayField.class, + RecordWithIntegerListField.class, + RecordWithLongArrayField.class, + RecordWithLongListField.class + }, + nulls = true + ) + public void objectWithArrayOfIntegralNumbersWithNulls(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).usingRecursiveComparison().isEqualTo(expected); + } + + @CartesianTest + public void outOfPrimitiveByteRange( + @Values(classes = {byte.class, Byte.class}) Class expectedType, + @Values(strings = { + "-9223372036854775809", + "-129", + "128", + "9223372036854775808" + }) String numStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Number value is out of byte range ([-128, 127])."); + } + + @CartesianTest + public void outOfPrimitiveShortRange( + @Values(classes = {short.class, Short.class}) Class expectedType, + @Values(strings = { + "-9223372036854775809", + "-32769", + "32768", + "9223372036854775808" + }) String numStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Number value is out of short range ([-32768, 32767])."); + } + + @CartesianTest + public void outOfPrimitiveIntegerRange( + @Values(classes = {int.class, Integer.class}) Class expectedType, + @Values(strings = { + "-9223372036854775809", + "-2147483649", + "2147483648", + "9223372036854775808" + }) String numStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Number value is out of int range ([-2147483648, 2147483647])."); + } + + @CartesianTest + public void outOfPrimitiveLongRange( + @Values(classes = {long.class, Long.class}) Class expectedType, + @Values(strings = { + "9223372036854775808", + "9999999999999999999", + "10000000000000000000", + "-9223372036854775809", + "-9999999999999999999", + "-10000000000000000000" + }) String numStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Number value is out of long range ([-9223372036854775808, 9223372036854775807])."); + } + + @CartesianTest + public void leadingZerosAreNotAllowed( + @Values(strings = {"01", "-01", "000", "-000"}) String jsonStr, + @Values(classes = { + byte.class, + Byte.class, + short.class, + Short.class, + int.class, + Integer.class, + long.class, + Long.class + }) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Leading zeroes are not allowed."); + } + + @CartesianTest + public void minusHasToBeFollowedByAtLeastOneDigit( + @Values(strings = {"-a123", "--123", "-+123"}) String jsonStr, + @Values(classes = { + byte.class, + Byte.class, + short.class, + Short.class, + int.class, + Integer.class, + long.class, + Long.class + }) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @CartesianTest + public void numberHasToBeFollowedByStructuralCharacterOrWhitespace( + @Values(strings = {"-1-2", "1a"}) String jsonStr, + @Values(classes = { + byte.class, + Byte.class, + short.class, + Short.class, + int.class, + Integer.class, + long.class, + Long.class + }) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Number has to be followed by a structural character or whitespace."); + } + + @CartesianTest + public void moreValuesThanOneIntegralNumberAtRoot( + @Values(strings = {"123,", "123{}", "1:"}) String jsonStr, + @Values(classes = { + byte.class, + Byte.class, + short.class, + Short.class, + int.class, + Integer.class, + long.class, + Long.class + }) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @CartesianTest + public void floatingPointNumberAsIntegralNumber( + @Values(strings = {"1.0", "-1.0", "1e1", "1.9e1"}) String jsonStr, + @Values(classes = { + byte.class, + Byte.class, + short.class, + Short.class, + int.class, + Integer.class, + long.class, + Long.class + }) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Number has to be followed by a structural character or whitespace."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = BigInteger.class, value = "Class: java.math.BigInteger has more than one constructor."), + @MapEntry(classKey = Number.class, value = "Unsupported class: java.lang.Number. Interfaces and abstract classes are not supported."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = Boolean.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = byte[].class, value = "Expected '[' but got: '1'.") + }) + public void mismatchedTypeForIntegralNumberAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("123"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithBooleanField.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = RecordWithByteArrayField.class, value = "Expected '[' but got: '1'.") + }) + public void mismatchedTypeForIntegralNumberAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": 123}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @ValueSource(classes = { + byte[].class, + Byte[].class, + short[].class, + Short[].class, + int[].class, + Integer[].class, + long[].class, + Long[].class + }) + public void arrayOfIntegralNumbersMixedWithOtherTypesAtRoot(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1, -1, true]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @ValueSource(classes = { + RecordWithByteArrayField.class, + RecordWithPrimitiveByteArrayField.class, + RecordWithByteListField.class, + RecordWithShortArrayField.class, + RecordWithPrimitiveShortArrayField.class, + RecordWithShortListField.class, + RecordWithIntegerArrayField.class, + RecordWithPrimitiveIntegerArrayField.class, + RecordWithIntegerListField.class, + RecordWithLongArrayField.class, + RecordWithPrimitiveLongArrayField.class, + RecordWithLongListField.class + }) + public void arrayOfIntegralNumbersMixedWithOtherTypesAtObjectField(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1, -1, true]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = BigInteger[].class, value = "Class: java.math.BigInteger has more than one constructor."), + @MapEntry(classKey = Number[].class, value = "Unsupported class: java.lang.Number. Interfaces and abstract classes are not supported."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = int.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = byte.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = Byte.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = byte[][].class, value = "Expected '[' but got: '1'."), + @MapEntry(classKey = Byte[][].class, value = "Expected '[' but got: '1'.") + }) + public void mismatchedTypeForArrayOfIntegralNumbersAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1, -1, 0]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = byte[].class, value = "Expected '[' but got: '{'."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithBooleanField.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = RecordWithPrimitiveBooleanField.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = byte.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = Byte.class, value = "Invalid number. Minus has to be followed by a digit.") + }) + public void mismatchedTypeForArrayOfIntegralNumbersAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1, -1, 0]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @ValueSource(classes = { + byte.class, + Byte.class, + short.class, + Short.class, + int.class, + Integer.class, + long.class, + Long.class + }) + public void startingWithPlusIsNotAllowed(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("+1"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @CartesianTest + public void numberHasToStartWithMinusOrDigit( + @Values(strings = {"a123", "a-123"}) String jsonStr, + @Values(classes = { + byte.class, + Byte.class, + short.class, + Short.class, + int.class, + Integer.class, + long.class, + Long.class + }) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @ValueSource(classes = {byte.class, Byte.class}) + public void minusZeroIsTreatedAsByteZero(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("-0"); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo((byte) 0); + } + + @ParameterizedTest + @ValueSource(classes = {short.class, Short.class}) + public void minusZeroIsTreatedAsShortZero(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("-0"); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo((short) 0); + } + + @ParameterizedTest + @ValueSource(classes = {int.class, Integer.class}) + public void minusZeroIsTreatedAsIntegerZero(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("-0"); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0); + } + + @ParameterizedTest + @ValueSource(classes = {long.class, Long.class}) + public void minusZeroIsTreatedAsLongZero(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("-0"); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0L); + } + + @ParameterizedTest + @ValueSource(classes = {Byte.class, byte.class, Short.class, short.class, Integer.class, int.class, Long.class, long.class}) + public void emptyJson(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("No structural element found."); + } + + @ParameterizedTest + @ValueSource(classes = {Byte.class, Short.class, Integer.class, Long.class}) + public void passedLengthSmallerThanNullLength(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("null")); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } + + @ParameterizedTest + @ValueSource(classes = {byte.class, Byte.class, short.class, Short.class, int.class, Integer.class, long.class, Long.class}) + public void passedLengthSmallerThanNumberLength(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("1234")); + + // when + Object value = parser.parse(json, 2, expectedType); + + // then + assertThat(value.toString()).isEqualTo("12"); + } +} diff --git a/src/test/java/org/simdjson/NullParsingTest.java b/src/test/java/org/simdjson/NullParsingTest.java new file mode 100644 index 0000000..cc2cbc4 --- /dev/null +++ b/src/test/java/org/simdjson/NullParsingTest.java @@ -0,0 +1,106 @@ +package org.simdjson; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.util.Iterator; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.TestUtils.padWithSpaces; +import static org.simdjson.TestUtils.toUtf8; + +public class NullParsingTest { + + @Test + public void nullValueAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isNull()).isTrue(); + } + + @ParameterizedTest + @ValueSource(strings = {"[n]", "{\"a\":n}"}) + public void invalidNull(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at " + jsonStr.indexOf('n') + ". Expected 'null'."); + } + + @Test + public void moreThanNullAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null,"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @ParameterizedTest + @ValueSource(strings = {"nulll", "nul"}) + public void invalidNullAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } + + @Test + public void arrayOfNulls() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[null, null, null]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + Iterator it = jsonValue.arrayIterator(); + for (int i = 0; i < 3; i++) { + assertThat(it.hasNext()).isTrue(); + JsonValue element = it.next(); + assertThat(element.isNull()).isTrue(); + } + assertThat(it.hasNext()).isFalse(); + } + + @Test + public void passedLengthSmallerThanNullLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("null")); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } +} diff --git a/src/test/java/org/simdjson/NumberParsingTest.java b/src/test/java/org/simdjson/NumberParsingTest.java index e92aca3..1599add 100644 --- a/src/test/java/org/simdjson/NumberParsingTest.java +++ b/src/test/java/org/simdjson/NumberParsingTest.java @@ -1,26 +1,68 @@ package org.simdjson; +import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; -import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; +import org.simdjson.testutils.FloatingPointNumberTestFile; +import org.simdjson.testutils.FloatingPointNumberTestFile.FloatingPointNumberTestCase; +import org.simdjson.testutils.FloatingPointNumberTestFilesSource; +import org.simdjson.testutils.RandomIntegralNumberSource; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; import java.io.IOException; -import java.nio.file.Path; -import java.util.List; -import java.util.stream.Stream; +import java.util.Iterator; -import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.simdjson.JsonValueAssert.assertThat; +import static org.simdjson.TestUtils.padWithSpaces; import static org.simdjson.TestUtils.toUtf8; +import static org.simdjson.testutils.SimdJsonAssertions.assertThat; public class NumberParsingTest { + @ParameterizedTest + @RandomIntegralNumberSource(classes = long.class, includeMinMax = true) + public void longAtRoot(String longStr, long expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(longStr); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(strings = {"1.1", "-1.1", "1e1", "1E1", "-1e1", "-1E1", "1e-1", "1E-1", "1.1e1", "1.1E1"}) + public void doubleAtRoot(String doubleStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(doubleStr); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue).isEqualTo(Double.parseDouble(doubleStr)); + } + + @ParameterizedTest + @ValueSource(strings = {"1,", "1.1,"}) + public void invalidNumbersAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + @ParameterizedTest @ValueSource(strings = { "123.", @@ -37,7 +79,8 @@ public void decimalPointHasToBeFollowedByAtLeastOneDigit(String input) { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Invalid number. Decimal point has to be followed by a digit."); + assertThat(ex) + .hasMessage("Invalid number. Decimal point has to be followed by a digit."); } @ParameterizedTest @@ -62,7 +105,8 @@ public void exponentIndicatorHasToBeFollowedByAtLeastOneDigit(String input) { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Invalid number. Exponent indicator has to be followed by a digit."); + assertThat(ex) + .hasMessage("Invalid number. Exponent indicator has to be followed by a digit."); } @ParameterizedTest @@ -83,7 +127,8 @@ public void leadingZerosAreNotAllowed(String input) { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Invalid number. Leading zeroes are not allowed."); + assertThat(ex) + .hasMessage("Invalid number. Leading zeroes are not allowed."); } @ParameterizedTest @@ -105,7 +150,8 @@ public void minusHasToBeFollowedByAtLeastOneDigit(String input) { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Invalid number. Minus has to be followed by a digit."); + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); } @ParameterizedTest @@ -125,7 +171,8 @@ public void numberHasToBeFollowedByStructuralCharacterOrWhitespace(String input) JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Number has to be followed by a structural character or whitespace."); + assertThat(ex) + .hasMessage("Number has to be followed by a structural character or whitespace."); } @Test @@ -151,7 +198,8 @@ public void startingWithPlusIsNotAllowed() { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."); + assertThat(ex) + .hasMessage("Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."); } @ParameterizedTest @@ -170,7 +218,8 @@ public void numberHasToStartWithMinusOrDigit(String input) { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."); + assertThat(ex) + .hasMessage("Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."); } @ParameterizedTest @@ -208,7 +257,8 @@ public void outOfRangeLongIsNotAllowed(String input) { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Number value is out of long range ([-9223372036854775808, 9223372036854775807])."); + assertThat(ex) + .hasMessage("Number value is out of long range ([-9223372036854775808, 9223372036854775807])."); } @ParameterizedTest @@ -541,50 +591,57 @@ public void exactDouble(String input, double expected) { } @ParameterizedTest - @MethodSource("listTestFiles") - // This test assumes that input files are formatted as described in: https://github.com/nigeltao/parse-number-fxx-test-data - public void testFiles(File file) throws IOException { + @FloatingPointNumberTestFilesSource + public void testFiles(FloatingPointNumberTestFile file) throws IOException { // given SimdJsonParser parser = new SimdJsonParser(); - try (BufferedReader br = new BufferedReader(new FileReader(file))) { - String line; - while ((line = br.readLine()) != null) { - String[] cells = line.split(" "); - Double expected = Double.longBitsToDouble(Long.decode("0x" + cells[2])); - String input = readInputNumber(cells[3]); - byte[] json = toUtf8(input); + try (FloatingPointNumberTestFile.FloatingPointNumberTestCasesIterator it = file.iterator()) { + while (it.hasNext()) { + FloatingPointNumberTestCase testCase = it.next(); + byte[] json = toUtf8(testCase.input()); // when JsonValue value = parser.parse(json, json.length); // then - assertThat(value).isEqualTo(expected); + assertThat(value) + .withFailMessage("%nline: %d%n expected: %s%n was: %s", testCase.line(), testCase.expectedDouble(), value) + .isEqualTo(testCase.expectedDouble()); } } } - private static String readInputNumber(String input) { - boolean isDouble = input.indexOf('e') >= 0 || input.indexOf('E') >= 0 || input.indexOf('.') >= 0; - if (isDouble) { - if (input.startsWith(".")) { - input = "0" + input; - } - return input.replaceFirst("\\.[eE]", ".0e"); - } - return input + ".0"; + @Test + public void arrayOfNumbers() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[0, 1, -1, 1.1]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + Iterator it = jsonValue.arrayIterator(); + Assertions.assertThat(it.hasNext()).isTrue(); + assertThat(it.next()).isEqualTo(0); + assertThat(it.next()).isEqualTo(1); + assertThat(it.next()).isEqualTo(-1); + assertThat(it.next()).isEqualTo(1.1); + Assertions.assertThat(it.hasNext()).isFalse(); } - private static List listTestFiles() throws IOException { - String testDataDir = System.getProperty("org.simdjson.testdata.dir", System.getProperty("user.dir") + "/testdata"); - File[] testFiles = Path.of(testDataDir, "parse-number-fxx-test-data", "data").toFile().listFiles(); - if (testFiles == null) { - File emptyFile = new File(testDataDir, "empty.txt"); - emptyFile.createNewFile(); - return List.of(emptyFile); - } - return Stream.of(testFiles) - .filter(File::isFile) - .toList(); + @Test + public void passedLengthSmallerThanNumberLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("1234")); + + // when + JsonValue value = parser.parse(json, 2); + + // then + assertThat(value).isEqualTo(12); } } diff --git a/src/test/java/org/simdjson/ObjectParsingTest.java b/src/test/java/org/simdjson/ObjectParsingTest.java index 36cdbbc..3aa94c7 100644 --- a/src/test/java/org/simdjson/ObjectParsingTest.java +++ b/src/test/java/org/simdjson/ObjectParsingTest.java @@ -3,11 +3,12 @@ import org.junit.jupiter.api.Test; import java.util.Iterator; +import java.util.List; import java.util.Map; -import static org.assertj.core.api.Assertions.assertThat; -import static org.simdjson.JsonValueAssert.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.simdjson.TestUtils.toUtf8; +import static org.simdjson.testutils.SimdJsonAssertions.assertThat; public class ObjectParsingTest { @@ -22,7 +23,7 @@ public void emptyObject() { // then assertThat(jsonValue.isObject()).isTrue(); - Iterator it = jsonValue.arrayIterator(); + Iterator> it = jsonValue.objectIterator(); assertThat(it.hasNext()).isFalse(); } @@ -94,4 +95,69 @@ public void nonexistentField() { assertThat(jsonValue.get("\\u20A9\\u0E3F")).isNull(); assertThat(jsonValue.get("αβ")).isNull(); } + + @Test + public void nullFieldName() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\\null: 1}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Object does not start with a key"); + } + + @Test + public void arrayOfObjects() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[{\"a\": 1}, {\"a\": 2}, {\"a\": 3}]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + Iterator arrayIterator = jsonValue.arrayIterator(); + for (int expectedValue : List.of(1, 2, 3)) { + assertThat(arrayIterator.hasNext()).isTrue(); + JsonValue object = arrayIterator.next(); + assertThat(object.isObject()).isTrue(); + JsonValue field = object.get("a"); + assertThat(field.isLong()).isTrue(); + assertThat(field.asLong()).isEqualTo(expectedValue); + } + assertThat(arrayIterator.hasNext()).isFalse(); + } + + @Test + public void emptyJson() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("No structural element found."); + } + + @Test + public void unclosedObjectDueToPassedLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"a\":{}}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length - 1)); + + // then + assertThat(ex) + .hasMessage("No comma between object fields"); + } } diff --git a/src/test/java/org/simdjson/ObjectSchemaBasedParsingTest.java b/src/test/java/org/simdjson/ObjectSchemaBasedParsingTest.java new file mode 100644 index 0000000..dcb6545 --- /dev/null +++ b/src/test/java/org/simdjson/ObjectSchemaBasedParsingTest.java @@ -0,0 +1,821 @@ +package org.simdjson; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.simdjson.annotations.JsonFieldName; +import org.simdjson.schemas.ClassWithIntegerField; +import org.simdjson.schemas.ClassWithPrimitiveBooleanField; +import org.simdjson.schemas.ClassWithPrimitiveByteField; +import org.simdjson.schemas.ClassWithPrimitiveCharacterField; +import org.simdjson.schemas.ClassWithPrimitiveDoubleField; +import org.simdjson.schemas.ClassWithPrimitiveFloatField; +import org.simdjson.schemas.ClassWithPrimitiveIntegerField; +import org.simdjson.schemas.ClassWithPrimitiveLongField; +import org.simdjson.schemas.ClassWithPrimitiveShortField; +import org.simdjson.schemas.ClassWithStringField; +import org.simdjson.schemas.RecordWithIntegerField; +import org.simdjson.schemas.RecordWithPrimitiveBooleanField; +import org.simdjson.schemas.RecordWithPrimitiveByteField; +import org.simdjson.schemas.RecordWithPrimitiveCharacterField; +import org.simdjson.schemas.RecordWithPrimitiveDoubleField; +import org.simdjson.schemas.RecordWithPrimitiveFloatField; +import org.simdjson.schemas.RecordWithPrimitiveIntegerField; +import org.simdjson.schemas.RecordWithPrimitiveLongField; +import org.simdjson.schemas.RecordWithPrimitiveShortField; +import org.simdjson.schemas.RecordWithStringField; +import org.simdjson.testutils.MapEntry; +import org.simdjson.testutils.MapSource; +import org.simdjson.testutils.SchemaBasedRandomValueSource; + +import java.lang.reflect.InvocationTargetException; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.TestUtils.padWithSpaces; +import static org.simdjson.TestUtils.toUtf8; +import static org.simdjson.testutils.SimdJsonAssertions.assertThat; + +public class ObjectSchemaBasedParsingTest { + + @ParameterizedTest + @ValueSource(classes = { + RecordWithIntegerField.class, + ClassWithIntegerField.class, + ClassWithoutExplicitConstructor.class + }) + public void emptyObject(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{}"); + + // when + Object object = parser.parse(json, json.length, expectedType); + + // then + assertThat(object).isNotNull(); + assertThat(object).hasAllNullFieldsOrProperties(); + } + + @ParameterizedTest + @ValueSource(classes = { + RecordWithPrimitiveByteField.class, + RecordWithPrimitiveShortField.class, + RecordWithPrimitiveIntegerField.class, + RecordWithPrimitiveLongField.class, + RecordWithPrimitiveBooleanField.class, + RecordWithPrimitiveFloatField.class, + RecordWithPrimitiveDoubleField.class, + RecordWithPrimitiveCharacterField.class, + ClassWithPrimitiveByteField.class, + ClassWithPrimitiveShortField.class, + ClassWithPrimitiveIntegerField.class, + ClassWithPrimitiveLongField.class, + ClassWithPrimitiveBooleanField.class, + ClassWithPrimitiveFloatField.class, + ClassWithPrimitiveDoubleField.class, + ClassWithPrimitiveCharacterField.class + }) + public void emptyObjectWhenPrimitiveFieldsAreExpected(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{}"); + + // when + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasCauseExactlyInstanceOf(NullPointerException.class); + } + + @Test + public void nullAtRootWhenObjectIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + RecordWithPrimitiveByteField object = parser.parse(json, json.length, RecordWithPrimitiveByteField.class); + + // then + assertThat(object).isNull(); + } + + @Test + public void nullAtObjectFieldWhenObjectIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"nestedField\": null}"); + + // when + NestedRecordWithStringField object = parser.parse(json, json.length, NestedRecordWithStringField.class); + + // then + assertThat(object).isNotNull(); + assertThat(object.nestedField()).isNull(); + } + + @Test + public void recordWithExplicitFieldNames() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4, \"fifth_field\": 5}"); + + // when + RecordWithExplicitFieldNames object = parser.parse(json, json.length, RecordWithExplicitFieldNames.class); + + // then + assertThat(object.firstField()).isEqualTo(1); + assertThat(object.secondField()).isEqualTo(2); + assertThat(object.thirdField()).isEqualTo(3); + assertThat(object.fourthField()).isEqualTo(4); + assertThat(object.fifthField()).isEqualTo(5); + } + + @Test + public void classWithExplicitFieldNames() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4, \"fifth_field\": 5}"); + + // when + StaticClassWithExplicitFieldNames object = parser.parse(json, json.length, StaticClassWithExplicitFieldNames.class); + + // then + assertThat(object.getFirstField()).isEqualTo(1); + assertThat(object.getSecondField()).isEqualTo(2); + assertThat(object.getThirdField()).isEqualTo(3); + assertThat(object.getFourthField()).isEqualTo(4); + assertThat(object.getFifthField()).isEqualTo(5); + } + + @Test + public void recordWithImplicitAndExplicitFieldNames() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"implicitField\": \"abc\", \"explicit_field\": \"def\"}"); + + // when + RecordWithImplicitAndExplicitFieldNames object = parser.parse(json, json.length, RecordWithImplicitAndExplicitFieldNames.class); + + // then + assertThat(object.implicitField()).isEqualTo("abc"); + assertThat(object.explicitField()).isEqualTo("def"); + } + + @ParameterizedTest + @ValueSource(classes = {StaticClassWithImplicitAndExplicitFieldNames.class, StaticClassWithImplicitFieldNames.class}) + public void classWithImplicitFieldNames(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"firstField\": \"abc\", \"second_field\": \"def\"}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Some of " + expectedType.getName() + "'s constructor arguments are not annotated with @JsonFieldName."); + } + + @Test + public void nonStaticInnerClassesAreUnsupported() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": \"abc\"}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, NonStaticInnerClass.class) + ); + + // then + assertThat(ex) + .hasMessage("Unsupported class: " + NonStaticInnerClass.class.getName() + ". Inner non-static classes are not supported."); + } + + @Test + public void fieldNamesWithEscapes() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"\\\"abc\\\\\": 1}"); + + // when + RecordWithEscapedFieldName jsonValue = parser.parse(json, json.length, RecordWithEscapedFieldName.class); + + // then + assertThat(jsonValue.firstField()).isEqualTo(1); + } + + @Test + public void fieldExistsInJsonButDoesNotExistInRecord() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"first\": 1, \"field\": 2, \"second\": 3}"); + + // when + RecordWithIntegerField jsonValue = parser.parse(json, json.length, RecordWithIntegerField.class); + + // then + assertThat(jsonValue.field()).isEqualTo(2); + } + + @Test + public void fieldDoesNotExistInJsonButExistsInRecord() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"first\": 1, \"second\": 3}"); + + // when + RecordWithIntegerField jsonValue = parser.parse(json, json.length, RecordWithIntegerField.class); + + // then + assertThat(jsonValue.field()).isNull(); + } + + @Test + public void primitiveFieldDoesNotExistInJsonButExistsInRecord() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"first\": 1, \"second\": 3}"); + + // when + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveIntegerField.class) + ); + + // then + assertThat(ex) + .hasCauseExactlyInstanceOf(NullPointerException.class); + } + + @ParameterizedTest + @ValueSource(classes = {NestedRecordWithStringField.class, NestedStaticClassWithStringField.class}) + public void objectWithEmptyObjectField(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"nestedField\": {}}"); + + // when + Object object = parser.parse(json, json.length, expectedType); + + // then + assertThat(object).isNotNull(); + assertThat(object).hasNoNullFieldsOrProperties(); + assertThat(object).extracting("nestedField").hasFieldOrPropertyWithValue("field", null); + } + + @Test + public void objectWithObjectFieldToRecord() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"nestedField\": {\"field\": \"abc\"}}"); + + // when + NestedRecordWithStringField object = parser.parse(json, json.length, NestedRecordWithStringField.class); + + // then + assertThat(object).isNotNull(); + assertThat(object.nestedField()).isNotNull(); + assertThat(object.nestedField().field()).isEqualTo("abc"); + } + + @Test + public void mismatchedTypeAtRootWhenObjectIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"{}\""); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage("Expected '{' but got: '\"'."); + } + + @Test + public void mismatchedTypeAtObjectFieldWhenObjectIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"nestedField\": true}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, NestedRecordWithStringField.class) + ); + + // then + assertThat(ex) + .hasMessage("Expected '{' but got: 't'."); + } + + @Test + public void invalidButParsableJson() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": 1, : 2}"); + + // when + RecordWithIntegerField object = parser.parse(json, json.length, RecordWithIntegerField.class); + + // then + assertThat(object.field()).isEqualTo(1); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "{\"invalid\", \"field\": 1}", value = "Expected ':' but got: ','."), + @MapEntry(stringKey = "{\"field\": 1, \"invalid\"}", value = "More than one JSON value at the root of the document, or extra characters at the end of the JSON!") + }) + public void fieldWithoutValue(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "{\"invalid\" 2, \"field\": 1}", value = "Expected ':' but got: '2'."), + @MapEntry(stringKey = "{\"field\": 1, \"invalid\" 2}", value = "More than one JSON value at the root of the document, or extra characters at the end of the JSON!") + }) + public void missingColon(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @ValueSource(strings = { + "{\"invalid\": 2 \"field\": 1}", + "{\"field\": 1 \"invalid\" 2}", + }) + public void missingComma(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage("Expected ',' but got: '\"'."); + } + + @Test + public void fieldWithoutName() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{: 2, \"field\": 1}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage("Expected '\"' but got: ':'."); + } + + @ParameterizedTest + @ValueSource(strings = {"\\null", "1", "true", "false", "[]", "{}"}) + public void invalidTypeOfFieldName(String fieldName) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{" + fieldName + ": 1}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage("Expected '\"' but got: '" + fieldName.charAt(0) + "'."); + } + + @ParameterizedTest + @ValueSource(strings = {"{\"field\": 1", "{\"field\":", "{\"field\"", "{", "{\"ignore\": {\"field\": 1", "{\"field\": 1,",}) + public void unclosedObject(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex).hasMessage("Unclosed object. Missing '}' for starting '{'."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = RecordWithIntegerField[].class, nulls = true) + public void arrayOfObjectsAtRoot(String jsonStr, RecordWithIntegerField[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + RecordWithIntegerField[] array = parser.parse(json, json.length, RecordWithIntegerField[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = RecordWithIntegerField[].class, nulls = true) + public void arrayOfObjectsAtObjectField(String jsonStr, RecordWithIntegerField[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + ArrayOfRecordsWithIntegerField object = parser.parse(json, json.length, ArrayOfRecordsWithIntegerField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = RecordWithIntegerField[].class, nulls = true) + public void listOfObjectsAtObjectField(String jsonStr, RecordWithIntegerField[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + ListOfRecordsWithIntegerField object = parser.parse(json, json.length, ListOfRecordsWithIntegerField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "{},", value = "Unclosed object. Missing '}' for starting '{'."), + @MapEntry(stringKey = "{\"field\": 1},", value = "Unclosed object. Missing '}' for starting '{'."), + @MapEntry(stringKey = "{\"field\": 1}[]", value = "Unclosed object. Missing '}' for starting '{'."), + @MapEntry(stringKey = "{\"field\": 1}{}", value = "More than one JSON value at the root of the document, or extra characters at the end of the JSON!"), + @MapEntry(stringKey = "{\"field\": 1}1", value = "Unclosed object. Missing '}' for starting '{'."), + @MapEntry(stringKey = "null,", value = "More than one JSON value at the root of the document, or extra characters at the end of the JSON!") + }) + public void moreValuesThanOneObjectAtRoot(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, RecordWithIntegerField.class)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @Test + public void classWithMultipleConstructors() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": 1, \"field2\": 2}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, ClassWithMultipleConstructors.class) + ); + + // then + assertThat(ex) + .hasMessage("Class: " + ClassWithMultipleConstructors.class.getName() + " has more than one constructor."); + } + + @Test + public void recordWithMultipleConstructors() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": 1, \"field2\": 2}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithMultipleConstructors.class) + ); + + // then + assertThat(ex) + .hasMessage("Class: " + RecordWithMultipleConstructors.class.getName() + " has more than one constructor."); + } + + @Test + public void missingObjectField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"intField\": 1}"); + + // when + NestedRecordWithStringField object = parser.parse(json, json.length, NestedRecordWithStringField.class); + + // then + assertThat(object.nestedField()).isNull(); + } + + @Test + public void objectInstantiationFailure() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": 1}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, ClassWithFailingConstructor.class) + ); + + // then + assertThat(ex) + .hasMessage("Failed to construct an instance of " + ClassWithFailingConstructor.class.getName()) + .hasCauseExactlyInstanceOf(InvocationTargetException.class); + } + + @Test + public void emptyJson() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(""); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage("No structural element found."); + } + + @Test + public void passedLengthSmallerThanNullLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("null")); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, 3, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } + + @Test + public void genericClassesOtherThanListAreNotSupported() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": {\"field\": 123}}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithGenericField.class) + ); + + // then + assertThat(ex) + .hasMessage("Parametrized types other than java.util.List are not supported."); + } + + @Test + public void listsWithoutElementTypeAreNotSupported() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1, 2, 3]}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithListWithoutElementType.class) + ); + + // then + assertThat(ex) + .hasMessage("Undefined list element type."); + } + + private record RecordWithExplicitFieldNames(@JsonFieldName("ąćśńźż") long firstField, + @JsonFieldName("\u20A9\u0E3F") long secondField, + @JsonFieldName("αβγ") long thirdField, + @JsonFieldName("😀abc😀") long fourthField, + @JsonFieldName("fifth_field") long fifthField) { + } + + private static class StaticClassWithExplicitFieldNames { + + private final long firstField; + private final long secondField; + private final long thirdField; + private final long fourthField; + private final long fifthField; + + private StaticClassWithExplicitFieldNames(@JsonFieldName("ąćśńźż") long firstField, + @JsonFieldName("\u20A9\u0E3F") long secondField, + @JsonFieldName("αβγ") long thirdField, + @JsonFieldName("😀abc😀") long fourthField, + @JsonFieldName("fifth_field") long fifthField) { + this.firstField = firstField; + this.secondField = secondField; + this.thirdField = thirdField; + this.fourthField = fourthField; + this.fifthField = fifthField; + } + + public long getFirstField() { + return firstField; + } + + public long getSecondField() { + return secondField; + } + + public long getThirdField() { + return thirdField; + } + + public long getFourthField() { + return fourthField; + } + + public long getFifthField() { + return fifthField; + } + } + + private record RecordWithImplicitAndExplicitFieldNames(String implicitField, + @JsonFieldName("explicit_field") String explicitField) { + } + + private static class StaticClassWithImplicitAndExplicitFieldNames { + + private final String firstField; + private final String secondField; + + StaticClassWithImplicitAndExplicitFieldNames(String firstField, @JsonFieldName("second_field") String secondField) { + this.firstField = firstField; + this.secondField = secondField; + } + + String getFirstField() { + return firstField; + } + + String getSecondField() { + return secondField; + } + } + + private static class StaticClassWithImplicitFieldNames { + + private final String firstField; + private final String secondField; + + StaticClassWithImplicitFieldNames(String firstField, String secondField) { + this.firstField = firstField; + this.secondField = secondField; + } + + String getFirstField() { + return firstField; + } + + String getSecondField() { + return secondField; + } + } + + private record RecordWithEscapedFieldName(@JsonFieldName("\"abc\\") long firstField) { + } + + private record NestedRecordWithStringField(RecordWithStringField nestedField) { + + } + + private static class NestedStaticClassWithStringField { + + private final ClassWithStringField nestedField; + + NestedStaticClassWithStringField(@JsonFieldName("nestedField") ClassWithStringField nestedField) { + this.nestedField = nestedField; + } + + ClassWithStringField getNestedField() { + return nestedField; + } + } + + private record ArrayOfRecordsWithIntegerField(RecordWithIntegerField[] field) { + + } + + private record ListOfRecordsWithIntegerField(List field) { + + } + + private static class ClassWithMultipleConstructors { + + private final int field; + private final int field2; + + ClassWithMultipleConstructors(@JsonFieldName("field") int field) { + this.field = field; + this.field2 = 0; + } + + ClassWithMultipleConstructors(@JsonFieldName("field") int field, @JsonFieldName("field2") int field2) { + this.field = field; + this.field2 = field2; + } + } + + private record RecordWithMultipleConstructors(int field, int field2) { + + RecordWithMultipleConstructors(int field) { + this(field, 0); + } + } + + private static class ClassWithFailingConstructor { + + ClassWithFailingConstructor(@JsonFieldName("field") int field) { + throw new RuntimeException(); + } + } + + private class NonStaticInnerClass { + + private final String field; + + NonStaticInnerClass(@JsonFieldName("field") String field) { + this.field = field; + } + + String getField() { + return field; + } + } + + private record RecordWithGenericField(GenericRecord field) { + + } + + private record GenericRecord(T field) { + + } + + private record RecordWithListWithoutElementType(List field) { + + } + + private static class ClassWithoutExplicitConstructor { + + } +} diff --git a/src/test/java/org/simdjson/SimdJsonParserTest.java b/src/test/java/org/simdjson/SimdJsonParserTest.java deleted file mode 100644 index 3c16218..0000000 --- a/src/test/java/org/simdjson/SimdJsonParserTest.java +++ /dev/null @@ -1,323 +0,0 @@ -package org.simdjson; - -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; - -import java.io.IOException; -import java.util.Iterator; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.fail; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.simdjson.JsonValueAssert.assertThat; -import static org.simdjson.TestUtils.loadTestFile; -import static org.simdjson.TestUtils.toUtf8; - -public class SimdJsonParserTest { - - @Test - public void testEmptyArray() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[]"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - Iterator it = jsonValue.arrayIterator(); - while (it.hasNext()) { - fail("Unexpected value"); - it.next(); - } - } - - @Test - public void testArrayIterator() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[1, 2, 3]"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - int[] expectedValues = new int[]{1, 2, 3}; - int counter = 0; - Iterator it = jsonValue.arrayIterator(); - while (it.hasNext()) { - JsonValue element = it.next(); - assertThat(element.isLong()).isTrue(); - assertThat(element.asLong()).isEqualTo(expectedValues[counter]); - counter++; - } - assertThat(counter).isEqualTo(expectedValues.length); - } - - @Test - public void testBooleanValues() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[true, false]"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - Iterator it = jsonValue.arrayIterator(); - assertThat(it.hasNext()).isTrue(); - assertThat(it.next()).isEqualTo(true); - assertThat(it.next()).isEqualTo(false); - assertThat(it.hasNext()).isFalse(); - } - - @ParameterizedTest - @ValueSource(booleans = {true, false}) - public void testBooleanValuesAsRoot(boolean booleanVal) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8(Boolean.toString(booleanVal)); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue).isEqualTo(booleanVal); - } - - @Test - public void testNullValue() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[null]"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - Iterator it = jsonValue.arrayIterator(); - assertThat(it.hasNext()).isTrue(); - JsonValue element = it.next(); - assertThat(element.isNull()).isTrue(); - assertThat(it.hasNext()).isFalse(); - } - - @Test - public void testNullValueAsRoot() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("null"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isNull()).isTrue(); - } - - @Test - public void testStringValues() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[\"abc\", \"ab\\\\c\"]"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - Iterator it = jsonValue.arrayIterator(); - assertThat(it.hasNext()).isTrue(); - assertThat(it.next()).isEqualTo("abc"); - assertThat(it.next()).isEqualTo("ab\\c"); - assertThat(it.hasNext()).isFalse(); - } - - @ParameterizedTest - @ValueSource(strings = {"abc", "ą"}) - public void testStringValuesAsRoot(String jsonStr) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("\"" + jsonStr + "\""); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue).isEqualTo(jsonStr); - } - - @Test - public void testNumericValues() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[0, 1, -1, 1.1]"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - Iterator it = jsonValue.arrayIterator(); - assertThat(it.hasNext()).isTrue(); - assertThat(it.next()).isEqualTo(0); - assertThat(it.next()).isEqualTo(1); - assertThat(it.next()).isEqualTo(-1); - assertThat(it.next()).isEqualTo(1.1); - assertThat(it.hasNext()).isFalse(); - } - - @ParameterizedTest - @ValueSource(strings = {"0", "1", "-1"}) - public void testLongValuesAsRoot(String longStr) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8(longStr); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue).isEqualTo(Long.parseLong(longStr)); - } - - @ParameterizedTest - @ValueSource(strings = {"1.1", "-1.1", "1e1", "1E1", "-1e1", "-1E1", "1e-1", "1E-1", "1.1e1", "1.1E1"}) - public void testDoubleValuesAsRoot(String doubleStr) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8(doubleStr); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue).isEqualTo(Double.parseDouble(doubleStr)); - } - - @ParameterizedTest - @ValueSource(strings = {"true,", "false,", "null,", "1,", "\"abc\",", "1.1,"}) - public void testInvalidPrimitivesAsRoot(String jsonStr) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8(jsonStr); - - // when - JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); - - // then - assertThat(ex.getMessage()) - .isEqualTo("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); - } - - @ParameterizedTest - @ValueSource(strings = {"[n]", "{\"a\":n}"}) - public void testInvalidNull(String jsonStr) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8(jsonStr); - - // when - JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); - - // then - assertThat(ex.getMessage()).isEqualTo("Invalid value starting at " + jsonStr.indexOf('n') + ". Expected 'null'."); - } - - @ParameterizedTest - @ValueSource(strings = {"[f]", "{\"a\":f}"}) - public void testInvalidFalse(String jsonStr) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8(jsonStr); - - // when - JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); - - // then - assertThat(ex.getMessage()).isEqualTo("Invalid value starting at " + jsonStr.indexOf('f') + ". Expected 'false'."); - } - - @ParameterizedTest - @ValueSource(strings = {"[t]", "{\"a\":t}"}) - public void testInvalidTrue(String jsonStr) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8(jsonStr); - - // when - JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); - - // then - assertThat(ex.getMessage()).isEqualTo("Invalid value starting at " + jsonStr.indexOf('t') + ". Expected 'true'."); - } - - @Test - public void testArraySize() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[1, 2, 3]"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - assertThat(jsonValue.getSize()).isEqualTo(3); - } - - @Test - public void testLargeArraySize() { - // given - SimdJsonParser parser = new SimdJsonParser(); - int realArraySize = 0xFFFFFF + 1; - byte[] json = new byte[realArraySize * 2 - 1 + 2]; - json[0] = '['; - int i = 0; - while (i < realArraySize) { - json[i * 2 + 1] = (byte) '0'; - json[i * 2 + 2] = (byte) ','; - i++; - } - json[json.length - 1] = ']'; - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - assertThat(jsonValue.getSize()).isEqualTo(0xFFFFFF); - } - - @Test - public void issue26DeepBench() throws IOException { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = loadTestFile("/deep_bench.json"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isObject()).isTrue(); - } - - @ParameterizedTest - @ValueSource(strings = {"/wide_bench.json", "/deep_bench.json"}) - public void issue26(String file) throws IOException { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = loadTestFile(file); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isObject()).isTrue(); - } -} diff --git a/src/test/java/org/simdjson/StringParsingTest.java b/src/test/java/org/simdjson/StringParsingTest.java index a61572e..580a5a7 100644 --- a/src/test/java/org/simdjson/StringParsingTest.java +++ b/src/test/java/org/simdjson/StringParsingTest.java @@ -3,40 +3,70 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; +import org.simdjson.testutils.RandomStringSource; +import org.simdjson.testutils.StringTestData; +import java.io.IOException; import java.util.Iterator; import java.util.List; -import static java.lang.Character.MAX_CODE_POINT; -import static java.lang.Character.isBmpCodePoint; -import static java.lang.Character.lowSurrogate; -import static java.util.stream.IntStream.rangeClosed; import static org.apache.commons.text.StringEscapeUtils.unescapeJava; -import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.simdjson.JsonValueAssert.assertThat; +import static org.junit.jupiter.api.Assertions.fail; +import static org.simdjson.TestUtils.loadTestFile; +import static org.simdjson.TestUtils.padWithSpaces; import static org.simdjson.TestUtils.toUtf8; +import static org.simdjson.testutils.SimdJsonAssertions.assertThat; public class StringParsingTest { + @ParameterizedTest + @RandomStringSource + public void stringAtRoot(String jsonStr, String expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + jsonStr + "\""); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(strings = {"\"abc\",", "\"abc\"def"}) + public void moreValuesThanOneStringAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + @Test public void usableUnicodeCharacters() { // given SimdJsonParser parser = new SimdJsonParser(); - List unicodeCharacters = rangeClosed(0, MAX_CODE_POINT) - .filter(Character::isDefined) - .filter(codePoint -> !isReservedCodePoint(codePoint)) - .mapToObj(StringParsingTest::toUnicodeEscape) - .toList(); + List characters = StringTestData.usableEscapedUnicodeCharacters(); - for (String input : unicodeCharacters) { - byte[] json = toUtf8("\"" + input + "\""); + for (String character : characters) { + try { + byte[] json = toUtf8("\"" + character + "\""); - // when - JsonValue value = parser.parse(json, json.length); + // when + JsonValue value = parser.parse(json, json.length); - // then - assertThat(value).isEqualTo(unescapeJava(input)); + // then + assertThat(value).isEqualTo(unescapeJava(character)); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } } } @@ -44,33 +74,37 @@ public void usableUnicodeCharacters() { public void unicodeCharactersReservedForLowSurrogate() { // given SimdJsonParser parser = new SimdJsonParser(); - List unicodeCharacters = rangeClosed(0xDC00, 0xDFFF) - .mapToObj(StringParsingTest::toUnicodeEscape) - .toList(); + List unicodeCharacters = StringTestData.escapedLowSurrogates(); - for (String input : unicodeCharacters) { - byte[] json = toUtf8("\"" + input + "\""); + for (String character : unicodeCharacters) { + try { + byte[] json = toUtf8("\"" + character + "\""); - // when - JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); - // then - assertThat(ex.getMessage()).isEqualTo("Invalid code point. The range U+DC00–U+DFFF is reserved for low surrogate."); + // then + assertThat(ex) + .hasMessage("Invalid code point. The range U+DC00–U+DFFF is reserved for low surrogate."); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } } } @ParameterizedTest - @ValueSource(strings = {"\\uD8001", "\\uD800\\1", "\\uD800u", "\\uD800\\e", "\\uD800\\DC00"}) - public void invalidLowSurrogateEscape(String input) { + @ValueSource(strings = {"\\uD8001", "\\uD800\\1", "\\uD800u", "\\uD800\\e", "\\uD800\\DC00", "\\uD800"}) + public void invalidLowSurrogateEscape(String invalidCharacter) { // given SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("\"" + input + "\""); + byte[] json = toUtf8("\"" + invalidCharacter + "\""); // when JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Low surrogate should start with '\\u'"); + assertThat(ex) + .hasMessage("Low surrogate should start with '\\u'"); } @ParameterizedTest @@ -84,55 +118,60 @@ public void missingLowSurrogate(String input) { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Invalid code point. Low surrogate should be in the range U+DC00–U+DFFF."); + assertThat(ex) + .hasMessage("Invalid code point. Low surrogate should be in the range U+DC00–U+DFFF."); } @Test public void invalidLowSurrogateRange() { // given SimdJsonParser parser = new SimdJsonParser(); - List unicodeCharacters = rangeClosed(0x0000, 0xFFFF) - .filter(lowSurrogate -> lowSurrogate < 0xDC00 || lowSurrogate > 0xDFFF) - .mapToObj(lowSurrogate -> String.format("\\uD800\\u%04X", lowSurrogate)) - .toList(); + List unicodeCharacters = StringTestData.escapedUnicodeCharactersWithInvalidLowSurrogate(); - for (String input : unicodeCharacters) { - byte[] json = toUtf8("\"" + input + "\""); + for (String character : unicodeCharacters) { + try { + byte[] json = toUtf8("\"" + character + "\""); - // when - JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); - // then - assertThat(ex.getMessage()).isEqualTo("Invalid code point. Low surrogate should be in the range U+DC00–U+DFFF."); + // then + assertThat(ex) + .hasMessage("Invalid code point. Low surrogate should be in the range U+DC00–U+DFFF."); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } } } @ParameterizedTest @ValueSource(strings = {"\\u", "\\u1", "\\u12", "\\u123"}) - public void invalidUnicode(String input) { + public void invalidUnicode(String invalidCharacter) { // given SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("\"" + input + "\""); + byte[] json = toUtf8("\"" + invalidCharacter + "\""); // when JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Invalid unicode escape sequence."); + assertThat(ex) + .hasMessage("Invalid unicode escape sequence."); } @ParameterizedTest @ValueSource(strings = {"\\g", "\\ą"}) - public void invalidEscape(String jsonStr) { + public void invalidEscape(String invalidCharacter) { // given SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[\"" + jsonStr + "\"]"); + byte[] json = toUtf8("[\"" + invalidCharacter + "\"]"); // when JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).startsWith("Escaped unexpected character: "); + assertThat(ex) + .hasMessageStartingWith("Escaped unexpected character: "); } @Test @@ -152,16 +191,86 @@ public void longString() { assertThat(it.hasNext()).isFalse(); } - private static String toUnicodeEscape(int codePoint) { - if (isBmpCodePoint(codePoint)) { - return String.format("\\u%04X", codePoint); - } else { - return String.format("\\u%04X\\u%04X", - (int) Character.highSurrogate(codePoint), (int) lowSurrogate(codePoint)); + @ParameterizedTest + @ValueSource(strings = {"/wide_bench.json", "/deep_bench.json"}) + public void issue26(String file) throws IOException { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = loadTestFile(file); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isObject()).isTrue(); + } + + @Test + public void unescapedControlCharacterAsString() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.unescapedControlCharacters(); + + for (String character : characters) { + try { + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Unescaped characters. Within strings, there are characters that should be escaped."); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } } } - private static boolean isReservedCodePoint(int codePoint) { - return codePoint >= 0xD800 && codePoint <= 0xDFFF; + @ParameterizedTest + @ValueSource(strings = {"\"", "\\"}) + public void unescapedSpecialStringCharacterAsString(String character) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessageStartingWith("Unclosed string. A string is opened, but never closed."); + } + + @Test + public void arrayOfStrings() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[\"abc\", \"ab\\\\c\"]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + Iterator it = jsonValue.arrayIterator(); + assertThat(it.hasNext()).isTrue(); + assertThat(it.next()).isEqualTo("abc"); + assertThat(it.next()).isEqualTo("ab\\c"); + assertThat(it.hasNext()).isFalse(); + } + + @Test + public void passedLengthSmallerThanStringLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("\"aaaaa\"")); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 6)); + + // then + assertThat(ex) + .hasMessage("Unclosed string. A string is opened, but never closed."); } } diff --git a/src/test/java/org/simdjson/StringSchemaBasedParsingTest.java b/src/test/java/org/simdjson/StringSchemaBasedParsingTest.java new file mode 100644 index 0000000..52ebcb0 --- /dev/null +++ b/src/test/java/org/simdjson/StringSchemaBasedParsingTest.java @@ -0,0 +1,1357 @@ +package org.simdjson; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.simdjson.schemas.RecordWithBooleanField; +import org.simdjson.schemas.RecordWithCharacterArrayField; +import org.simdjson.schemas.RecordWithCharacterField; +import org.simdjson.schemas.RecordWithCharacterListField; +import org.simdjson.schemas.RecordWithIntegerField; +import org.simdjson.schemas.RecordWithPrimitiveBooleanField; +import org.simdjson.schemas.RecordWithPrimitiveCharacterArrayField; +import org.simdjson.schemas.RecordWithPrimitiveCharacterField; +import org.simdjson.schemas.RecordWithPrimitiveIntegerField; +import org.simdjson.schemas.RecordWithStringArrayField; +import org.simdjson.schemas.RecordWithStringField; +import org.simdjson.schemas.RecordWithStringListField; +import org.simdjson.testutils.MapEntry; +import org.simdjson.testutils.MapSource; +import org.simdjson.testutils.RandomStringSource; +import org.simdjson.testutils.SchemaBasedRandomValueSource; +import org.simdjson.testutils.StringTestData; + +import java.util.List; + +import static org.apache.commons.text.StringEscapeUtils.unescapeJava; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.fail; +import static org.simdjson.TestUtils.padWithSpaces; +import static org.simdjson.TestUtils.toUtf8; + +public class StringSchemaBasedParsingTest { + + @Test + public void emptyStringAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"\""); + + // when + String string = parser.parse(json, json.length, String.class); + + // then + assertThat(string).isEqualTo(""); + } + + @ParameterizedTest + @RandomStringSource + public void stringAtRoot(String jsonStr, String expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + jsonStr + "\""); + + // when + String string = parser.parse(json, json.length, String.class); + + // then + assertThat(string).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(strings = {"true", "false", "1"}) + public void typeOtherThanStringAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected either string or 'null'."); + } + + @Test + public void nullAtRootWhenStringIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + String string = parser.parse(json, json.length, String.class); + + // then + assertThat(string).isNull(); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = Integer.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = char.class, value = "String cannot be deserialized to a char. Expected a single-character string."), + @MapEntry(classKey = Character.class, value = "String cannot be deserialized to a char. Expected a single-character string."), + @MapEntry(classKey = int.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = Boolean.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'.") + }) + public void mismatchedTypeForStringAsRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"abc\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @ValueSource(strings = {"\"abc\",", "\"abc\"def"}) + public void moreValuesThanOneStringAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @Test + public void emptyStringAtObjectField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": \"\"}"); + + // when + RecordWithStringField object = parser.parse(json, json.length, RecordWithStringField.class); + + // then + assertThat(object.field()).isEqualTo(""); + } + + @ParameterizedTest + @RandomStringSource + public void stringAtObjectField(String jsonStr, String expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": \"" + jsonStr + "\"}"); + + // when + RecordWithStringField object = parser.parse(json, json.length, RecordWithStringField.class); + + // then + assertThat(object.field()).isEqualTo(expected); + } + + @Test + public void nullAtObjectFieldWhenStringIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + RecordWithStringField object = parser.parse(json, json.length, RecordWithStringField.class); + + // then + assertThat(object.field()).isNull(); + } + + @ParameterizedTest + @ValueSource(strings = {"true", "false", "1"}) + public void typeOtherThanStringAtObjectField(String value) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + value + "}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithStringField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 10. Expected either string or 'null'."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = RecordWithPrimitiveCharacterField.class, value = "String cannot be deserialized to a char. Expected a single-character string."), + @MapEntry(classKey = RecordWithCharacterField.class, value = "String cannot be deserialized to a char. Expected a single-character string."), + @MapEntry(classKey = RecordWithPrimitiveIntegerField.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = RecordWithBooleanField.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'.") + }) + public void mismatchedTypeForStringAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": \"abc\"}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @Test + public void usableUnicodeCharactersAsString() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.usableEscapedUnicodeCharacters(); + + for (String character : characters) { + try { + byte[] json = toUtf8("\"" + character + "\""); + + // when + String value = parser.parse(json, json.length, String.class); + + // then + assertThat(value).isEqualTo(unescapeJava(character)); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } + } + } + + @Test + public void unicodeCharactersReservedForLowSurrogateAsString() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List codePoints = StringTestData.escapedLowSurrogates(); + + for (String codePoint : codePoints) { + try { + byte[] json = toUtf8("\"" + codePoint + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Invalid code point. The range U+DC00–U+DFFF is reserved for low surrogate."); + } catch (Throwable e) { + fail("Failed for code point: " + codePoint, e); + } + } + } + + @ParameterizedTest + @RandomStringSource(maxChars = 1) + public void characterAtRoot(String jsonStr, Character expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + jsonStr + "\""); + + // when + Character character = parser.parse(json, json.length, Character.class); + + // then + assertThat(character) + .isEqualTo(expected); + } + + @ParameterizedTest + @RandomStringSource(maxChars = 1) + public void primitiveCharAtRoot(String jsonStr, char expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + jsonStr + "\""); + + // when + char character = parser.parse(json, json.length, char.class); + + // then + assertThat(character) + .isEqualTo(expected); + } + + @Test + public void nullAtRootWhenCharacterIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + Character character = parser.parse(json, json.length, Character.class); + + // then + assertThat(character).isNull(); + } + + @Test + public void nullAtRootWhenPrimitiveCharacterIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected string."); + } + + @ParameterizedTest + @ValueSource(strings = {"true", "false", "1"}) + public void typeOtherThanCharacterAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected either string or 'null'."); + } + + @ParameterizedTest + @ValueSource(strings = {"true", "false", "1"}) + public void typeOtherThanPrimitiveCharacterAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected string."); + } + + @ParameterizedTest + @RandomStringSource(maxChars = 1) + public void characterAtObjectField(String jsonStr, Character expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": \"" + jsonStr + "\"}"); + + // when + RecordWithCharacterField object = parser.parse(json, json.length, RecordWithCharacterField.class); + + // then + assertThat(object.field()) + .isEqualTo(expected); + } + + @Test + public void nullAtObjectFieldWhenCharacterIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + RecordWithCharacterField object = parser.parse(json, json.length, RecordWithCharacterField.class); + + // then + assertThat(object.field()).isNull(); + } + + @ParameterizedTest + @RandomStringSource(maxChars = 1) + public void primitiveCharAtObjectField(String jsonStr, char expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": \"" + jsonStr + "\"}"); + + // when + RecordWithPrimitiveCharacterField object = parser.parse(json, json.length, RecordWithPrimitiveCharacterField.class); + + // then + assertThat(object.field()) + .isEqualTo(expected); + } + + @Test + public void nullAtObjectFieldWhenPrimitiveCharacterIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveCharacterField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 10. Expected string."); + } + + @ParameterizedTest + @ValueSource(strings = {"true", "false", "1"}) + public void typeOtherThanCharacterAtObjectField(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithCharacterField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 10. Expected either string or 'null'."); + } + + @ParameterizedTest + @ValueSource(strings = {"true", "false", "1"}) + public void typeOtherThanPrimitiveCharacterAtObjectField(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveCharacterField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 10. Expected string."); + } + + @ParameterizedTest + @ValueSource(strings = {"a\"", "\"a"}) + public void missingQuotationMarkForCharacter(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("Unclosed string. A string is opened, but never closed."); + } + + @ParameterizedTest + @ValueSource(strings = {"a\"", "\"a"}) + public void missingQuotationMarkForPrimitiveCharacter(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("Unclosed string. A string is opened, but never closed."); + } + + @Test + public void missingQuotationMarksForCharacterAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("a"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected either string or 'null'."); + } + + @Test + public void missingQuotationMarksForPrimitiveCharacterAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("a"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected string."); + } + + @Test + public void missingQuotationMarksForCharacterAtObjectField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": a}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithCharacterField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 10. Expected either string or 'null'."); + } + + @Test + public void missingQuotationMarksForPrimitiveCharacterAtObjectField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": a}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveCharacterField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 10. Expected string."); + } + + @ParameterizedTest + @ValueSource(strings = {"\"ab\"", "\"\\u0024\\u0023\""}) + public void stringLongerThanOneCharacterWhenCharacterIsExpected(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("String cannot be deserialized to a char. Expected a single-character string."); + } + + @ParameterizedTest + @ValueSource(strings = {"\"ab\"", "\"\\u0024\\u0023\""}) + public void stringLongerThanOneCharacterWhenPrimitiveCharacterIsExpected(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("String cannot be deserialized to a char. Expected a single-character string."); + } + + @ParameterizedTest + @ValueSource(strings = {"\\\"", "\\\\", "\\/", "\\b", "\\f", "\\n", "\\r", "\\t"}) + public void twoCharacterEscapeSequenceAsPrimitiveCharacter(String expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + expected + "\""); + + // when + char character = parser.parse(json, json.length, char.class); + + // then + assertThat(character).isEqualTo(unescapeJava(expected).charAt(0)); + } + + @ParameterizedTest + @ValueSource(strings = {"\\\"", "\\\\", "\\/", "\\b", "\\f", "\\n", "\\r", "\\t"}) + public void twoCharacterEscapeSequenceAsCharacter(String expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + expected + "\""); + + // when + Character character = parser.parse(json, json.length, Character.class); + + // then + assertThat(character).isEqualTo(unescapeJava(expected).charAt(0)); + } + + @ParameterizedTest + @ValueSource(classes = {Character.class, char.class}) + public void restrictedEscapedSingleCodeUnit(Class expectedClass) { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.reservedEscapedSingleCodeUnitCharacters(); + + for (String expected : characters) { + try { + byte[] json = toUtf8("\"" + expected + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedClass)); + + // then + assertThat(ex) + .hasMessage("Invalid code point. Should be within the range U+0000–U+D777 or U+E000–U+FFFF."); + } catch (Throwable e) { + fail("Failed for character: " + expected, e); + } + } + } + + @Test + public void usableEscapedSingleCodeUnitAsPrimitiveCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.usableEscapedSingleCodeUnitCharacters(); + + for (String expected : characters) { + try { + byte[] json = toUtf8("\"" + expected + "\""); + + // when + char character = parser.parse(json, json.length, char.class); + + // then + assertThat(character).isEqualTo(unescapeJava(expected).charAt(0)); + } catch (Throwable e) { + fail("Failed for character: " + expected, e); + } + } + } + + @Test + public void usableEscapedSingleCodeUnitAsCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.usableEscapedSingleCodeUnitCharacters(); + + for (String expected : characters) { + try { + byte[] json = toUtf8("\"" + expected + "\""); + + // when + Character character = parser.parse(json, json.length, Character.class); + + // then + assertThat(character).isEqualTo(unescapeJava(expected).charAt(0)); + } catch (Throwable e) { + fail("Failed for character: " + expected, e); + } + } + } + + @Test + public void usableSingleCodeUnitAsCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.usableSingleCodeUnitCharacters(); + + for (String expected : characters) { + try { + byte[] json = toUtf8("\"" + expected + "\""); + + // when + Character character = parser.parse(json, json.length, Character.class); + + // then + assertThat(character).isEqualTo(expected.charAt(0)); + } catch (Throwable e) { + fail("Failed for character: " + expected, e); + } + } + } + + @Test + public void usableSingleCodeUnitAsPrimitiveCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.usableSingleCodeUnitCharacters(); + + for (String expected : characters) { + try { + byte[] json = toUtf8("\"" + expected + "\""); + + // when + char character = parser.parse(json, json.length, char.class); + + // then + assertThat(character).isEqualTo(expected.charAt(0)); + } catch (Throwable e) { + fail("Failed for character: " + expected, e); + } + } + } + + @Test + public void usableTwoCodeUnitsAsPrimitiveCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.usableTwoCodeUnitsCharacters(); + + for (String expected : characters) { + try { + byte[] json = toUtf8("\"" + expected + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("String cannot be deserialized to a char. Expected a single 16-bit code unit character."); + } catch (Throwable e) { + fail("Failed for character: " + expected, e); + } + } + } + + @Test + public void usableTwoCodeUnitsAsCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.usableTwoCodeUnitsCharacters(); + + for (String expected : characters) { + try { + byte[] json = toUtf8("\"" + expected + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("String cannot be deserialized to a char. Expected a single 16-bit code unit character."); + } catch (Throwable e) { + fail("Failed for character: " + expected, e); + } + } + } + + @ParameterizedTest + @ValueSource(strings = {"\"a\",", "\"a\"b"}) + public void moreValuesThanOnePrimitiveCharacterAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @ParameterizedTest + @ValueSource(strings = {"\"a\",", "\"a\"b"}) + public void moreValuesThanOneCharacterAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @ParameterizedTest + @ValueSource(strings = {"\\uD8001", "\\uD800\\1", "\\uD800u", "\\uD800\\e", "\\uD800\\DC00", "\\uD800"}) + public void invalidLowSurrogateEscape(String input) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + input + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessage("Low surrogate should start with '\\u'"); + } + + @ParameterizedTest + @ValueSource(strings = {"\\uD800\\u"}) + public void missingLowSurrogate(String input) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + input + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessage("Invalid code point. Low surrogate should be in the range U+DC00–U+DFFF."); + } + + @Test + public void invalidLowSurrogateRange() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List unicodeCharacters = StringTestData.escapedUnicodeCharactersWithInvalidLowSurrogate(); + + for (String character : unicodeCharacters) { + try { + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessage("Invalid code point. Low surrogate should be in the range U+DC00–U+DFFF."); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } + } + } + + @ParameterizedTest + @ValueSource(strings = {"\\u", "\\u1", "\\u12", "\\u123"}) + public void invalidUnicodeAsString(String invalidCharacter) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + invalidCharacter + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessage("Invalid unicode escape sequence."); + } + + @ParameterizedTest + @ValueSource(strings = {"\\u", "\\u1", "\\u12", "\\u123"}) + public void invalidUnicodeAsPrimitiveCharacter(String invalidCharacter) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + invalidCharacter + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("Invalid unicode escape sequence."); + } + + @ParameterizedTest + @ValueSource(strings = {"\\u", "\\u1", "\\u12", "\\u123"}) + public void invalidUnicodeAsCharacter(String invalidCharacter) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + invalidCharacter + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("Invalid unicode escape sequence."); + } + + @ParameterizedTest + @ValueSource(strings = {"\\g", "\\ą"}) + public void invalidEscapeAsString(String escapedCharacter) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + escapedCharacter + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex).hasMessageStartingWith("Escaped unexpected character: "); + } + + @ParameterizedTest + @ValueSource(strings = {"\\g", "\\ą"}) + public void invalidEscapeAsPrimitiveCharacter(String escapedCharacter) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + escapedCharacter + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessageStartingWith("Escaped unexpected character: "); + } + + @ParameterizedTest + @ValueSource(strings = {"\\g", "\\ą"}) + public void invalidEscapeAsCharacter(String escapedCharacter) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + escapedCharacter + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessageStartingWith("Escaped unexpected character: "); + } + + @Test + public void unescapedControlCharacterAsString() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.unescapedControlCharacters(); + + for (String character : characters) { + try { + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessage("Unescaped characters. Within strings, there are characters that should be escaped."); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } + } + } + + @Test + public void unescapedControlCharacterAsPrimitiveCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.unescapedControlCharacters(); + + for (String character : characters) { + try { + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("Unescaped characters. Within strings, there are characters that should be escaped."); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } + } + } + + @Test + public void unescapedControlCharacterAsCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.unescapedControlCharacters(); + + for (String character : characters) { + try { + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("Unescaped characters. Within strings, there are characters that should be escaped."); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } + } + } + + @ParameterizedTest + @ValueSource(strings = {"\"", "\\"}) + public void unescapedSpecialStringCharacterAsString(String character) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessageStartingWith("Unclosed string. A string is opened, but never closed."); + } + + @ParameterizedTest + @ValueSource(strings = {"\"", "\\"}) + public void unescapedSpecialStringCharacterAsPrimitiveCharacter(String character) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessageStartingWith("Unclosed string. A string is opened, but never closed."); + } + + @ParameterizedTest + @ValueSource(strings = {"\"", "\\"}) + public void unescapedSpecialStringCharacterAsCharacter(String character) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessageStartingWith("Unclosed string. A string is opened, but never closed."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = String[].class, nulls = false) + public void arrayOfStringsAtRoot(String jsonStr, String[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + String[] array = parser.parse(json, json.length, String[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = String[].class, nulls = true) + public void arrayOfStringsAndNullsAtRoot(String jsonStr, String[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + String[] array = parser.parse(json, json.length, String[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @Test + public void arrayOfStringsMixedWithOtherTypesAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[\"abc\", \"ab\\\\c\", 1]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String[].class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 17. Expected either string or 'null'."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = String[].class, nulls = false) + public void objectWithArrayOfStrings(String jsonStr, String[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithStringArrayField object = parser.parse(json, json.length, RecordWithStringArrayField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = String[].class, nulls = false) + public void objectWithListOfStrings(String jsonStr, String[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithStringListField object = parser.parse(json, json.length, RecordWithStringListField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = String[].class, nulls = true) + public void objectWithListOfStringsAndNulls(String jsonStr, String[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithStringListField object = parser.parse(json, json.length, RecordWithStringListField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Character[].class, nulls = false) + public void arrayOfCharactersAtRoot(String jsonStr, Character[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Character[] array = parser.parse(json, json.length, Character[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = char[].class, nulls = false) + public void arrayOfPrimitiveCharactersAtRoot(String jsonStr, char[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + char[] array = parser.parse(json, json.length, char[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Character[].class, nulls = true) + public void arrayOfCharsAndNullsAtRoot(String jsonStr, Character[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Character[] array = parser.parse(json, json.length, Character[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @Test + public void arrayOfPrimitiveCharactersAndNullsAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[\"a\", \"b\", null]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char[].class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 11. Expected string."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = char[].class, value = "Invalid value starting at 11. Expected string."), + @MapEntry(classKey = Character[].class, value = "Invalid value starting at 11. Expected either string or 'null'.") + }) + public void arrayOfCharactersMixedWithOtherTypesAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[\"a\", \"b\", 1]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Character[].class, nulls = false) + public void objectWithArrayOfCharacters(String jsonStr, Character[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithCharacterArrayField object = parser.parse(json, json.length, RecordWithCharacterArrayField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = char[].class, nulls = false) + public void objectWithArrayOfPrimitiveCharacters(String jsonStr, char[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithPrimitiveCharacterArrayField object = parser.parse(json, json.length, RecordWithPrimitiveCharacterArrayField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Character[].class, nulls = false) + public void objectWithListOfCharacters(String jsonStr, Character[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithCharacterListField object = parser.parse(json, json.length, RecordWithCharacterListField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Character[].class, nulls = true) + public void objectWithListOfCharactersAndNulls(String jsonStr, Character[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithCharacterListField object = parser.parse(json, json.length, RecordWithCharacterListField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = int[].class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = int.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = boolean.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = Boolean.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = String[][].class, value = "Expected '[' but got: '\"'.") + }) + public void mismatchedTypeForArrayOfStringsAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[\"abc\", \"ab\\\\c\"]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = int[].class, value = "Expected '[' but got: '{'."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithIntegerField.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = RecordWithPrimitiveBooleanField.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = String[].class, value = "Expected '[' but got: '{'."), + @MapEntry(classKey = String[][].class, value = "Expected '[' but got: '{'.") + }) + public void mismatchedTypeForArrayOfStringsAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [\"abc\", \"ab\\\\c\"]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @Test + public void missingStringField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"intField\": 1}"); + + // when + RecordWithStringField object = parser.parse(json, json.length, RecordWithStringField.class); + + // then + assertThat(object.field()).isNull(); + } + + @Test + public void missingCharacterField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"intField\": 1}"); + + // when + RecordWithCharacterField object = parser.parse(json, json.length, RecordWithCharacterField.class); + + // then + assertThat(object.field()).isNull(); + } + + @Test + public void missingPrimitiveCharacterField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"intField\": 1}"); + + // when + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveCharacterField.class) + ); + + // then + assertThat(ex.getCause()).isInstanceOf(NullPointerException.class); + } + + @ParameterizedTest + @ValueSource(classes = {char.class, Character.class, String.class}) + public void emptyJson(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("No structural element found."); + } + + @ParameterizedTest + @ValueSource(classes = {Character.class, String.class}) + public void passedLengthSmallerThanNullLength(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("null")); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } + + @Test + public void passedLengthSmallerThanStringLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(padWithSpaces("\"aaaaa\"")); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, String.class)); + + // then + assertThat(ex) + .hasMessage("Unclosed string. A string is opened, but never closed."); + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithIntegerField.java b/src/test/java/org/simdjson/schemas/ClassWithIntegerField.java new file mode 100644 index 0000000..1b5d626 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithIntegerField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithIntegerField { + + private final Integer field; + + public ClassWithIntegerField(@JsonFieldName("field") Integer field) { + this.field = field; + } + + public Integer getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveBooleanField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveBooleanField.java new file mode 100644 index 0000000..16e0bc0 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveBooleanField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveBooleanField { + + private final boolean field; + + public ClassWithPrimitiveBooleanField(@JsonFieldName("field") boolean field) { + this.field = field; + } + + public boolean getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveByteField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveByteField.java new file mode 100644 index 0000000..6d6c5e4 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveByteField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveByteField { + + private final byte field; + + public ClassWithPrimitiveByteField(@JsonFieldName("field") byte field) { + this.field = field; + } + + public byte getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveCharacterField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveCharacterField.java new file mode 100644 index 0000000..369c5dc --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveCharacterField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveCharacterField { + + private final char field; + + public ClassWithPrimitiveCharacterField(@JsonFieldName("field") char field) { + this.field = field; + } + + public char getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveDoubleField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveDoubleField.java new file mode 100644 index 0000000..36e7695 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveDoubleField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveDoubleField { + + private final double field; + + public ClassWithPrimitiveDoubleField(@JsonFieldName("field") double field) { + this.field = field; + } + + public double getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveFloatField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveFloatField.java new file mode 100644 index 0000000..fb2bb99 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveFloatField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveFloatField { + + private final float field; + + public ClassWithPrimitiveFloatField(@JsonFieldName("field") float field) { + this.field = field; + } + + public float getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveIntegerField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveIntegerField.java new file mode 100644 index 0000000..793dc77 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveIntegerField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveIntegerField { + + private final int field; + + public ClassWithPrimitiveIntegerField(@JsonFieldName("field") int field) { + this.field = field; + } + + public int getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveLongField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveLongField.java new file mode 100644 index 0000000..17a9d2e --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveLongField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveLongField { + + private final long field; + + public ClassWithPrimitiveLongField(@JsonFieldName("field") long field) { + this.field = field; + } + + public long getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveShortField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveShortField.java new file mode 100644 index 0000000..35ac7fe --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveShortField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveShortField { + + private final short field; + + public ClassWithPrimitiveShortField(@JsonFieldName("field") short field) { + this.field = field; + } + + public short getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithStringField.java b/src/test/java/org/simdjson/schemas/ClassWithStringField.java new file mode 100644 index 0000000..2c8f3ee --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithStringField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithStringField { + + private final String field; + + public ClassWithStringField(@JsonFieldName("field") String field) { + this.field = field; + } + + public String getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithBooleanArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithBooleanArrayField.java new file mode 100644 index 0000000..351bd23 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithBooleanArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithBooleanArrayField(Boolean[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithBooleanField.java b/src/test/java/org/simdjson/schemas/RecordWithBooleanField.java new file mode 100644 index 0000000..5f8f3cf --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithBooleanField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithBooleanField(Boolean field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithBooleanListField.java b/src/test/java/org/simdjson/schemas/RecordWithBooleanListField.java new file mode 100644 index 0000000..3f3517c --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithBooleanListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithBooleanListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithByteArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithByteArrayField.java new file mode 100644 index 0000000..cf3eecb --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithByteArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithByteArrayField(Byte[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithByteField.java b/src/test/java/org/simdjson/schemas/RecordWithByteField.java new file mode 100644 index 0000000..7297453 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithByteField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithByteField(Byte field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithByteListField.java b/src/test/java/org/simdjson/schemas/RecordWithByteListField.java new file mode 100644 index 0000000..d15732a --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithByteListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithByteListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithCharacterArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithCharacterArrayField.java new file mode 100644 index 0000000..532cf55 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithCharacterArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithCharacterArrayField(Character[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithCharacterField.java b/src/test/java/org/simdjson/schemas/RecordWithCharacterField.java new file mode 100644 index 0000000..bd1d21a --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithCharacterField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithCharacterField(Character field) { + +} \ No newline at end of file diff --git a/src/test/java/org/simdjson/schemas/RecordWithCharacterListField.java b/src/test/java/org/simdjson/schemas/RecordWithCharacterListField.java new file mode 100644 index 0000000..7bff7b1 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithCharacterListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithCharacterListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithDoubleArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithDoubleArrayField.java new file mode 100644 index 0000000..d65c4d2 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithDoubleArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithDoubleArrayField(Double[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithDoubleField.java b/src/test/java/org/simdjson/schemas/RecordWithDoubleField.java new file mode 100644 index 0000000..1f3aeb3 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithDoubleField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithDoubleField(Double field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithDoubleListField.java b/src/test/java/org/simdjson/schemas/RecordWithDoubleListField.java new file mode 100644 index 0000000..cdcdc03 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithDoubleListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithDoubleListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithFloatArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithFloatArrayField.java new file mode 100644 index 0000000..611483b --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithFloatArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithFloatArrayField(Float[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithFloatField.java b/src/test/java/org/simdjson/schemas/RecordWithFloatField.java new file mode 100644 index 0000000..aeb95d7 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithFloatField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithFloatField(Float field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithFloatListField.java b/src/test/java/org/simdjson/schemas/RecordWithFloatListField.java new file mode 100644 index 0000000..ce75e91 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithFloatListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithFloatListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithIntegerArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithIntegerArrayField.java new file mode 100644 index 0000000..d442af2 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithIntegerArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithIntegerArrayField(Integer[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithIntegerField.java b/src/test/java/org/simdjson/schemas/RecordWithIntegerField.java new file mode 100644 index 0000000..5aafd3e --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithIntegerField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithIntegerField(Integer field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithIntegerListField.java b/src/test/java/org/simdjson/schemas/RecordWithIntegerListField.java new file mode 100644 index 0000000..6c34fd2 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithIntegerListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithIntegerListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithLongArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithLongArrayField.java new file mode 100644 index 0000000..2829062 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithLongArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithLongArrayField(Long[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithLongField.java b/src/test/java/org/simdjson/schemas/RecordWithLongField.java new file mode 100644 index 0000000..698db5c --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithLongField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithLongField(Long field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithLongListField.java b/src/test/java/org/simdjson/schemas/RecordWithLongListField.java new file mode 100644 index 0000000..a0ed295 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithLongListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithLongListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveBooleanArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveBooleanArrayField.java new file mode 100644 index 0000000..d3c0663 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveBooleanArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveBooleanArrayField(boolean[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveBooleanField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveBooleanField.java new file mode 100644 index 0000000..c67eae3 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveBooleanField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveBooleanField(boolean field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveByteArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveByteArrayField.java new file mode 100644 index 0000000..3127a2f --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveByteArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveByteArrayField(byte[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveByteField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveByteField.java new file mode 100644 index 0000000..64e3534 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveByteField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveByteField(byte field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveCharacterArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveCharacterArrayField.java new file mode 100644 index 0000000..003d4d4 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveCharacterArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveCharacterArrayField(char[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveCharacterField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveCharacterField.java new file mode 100644 index 0000000..9ff7386 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveCharacterField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveCharacterField(char field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveDoubleArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveDoubleArrayField.java new file mode 100644 index 0000000..29f6f2f --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveDoubleArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveDoubleArrayField(double[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveDoubleField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveDoubleField.java new file mode 100644 index 0000000..6325017 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveDoubleField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveDoubleField(double field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveFloatArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveFloatArrayField.java new file mode 100644 index 0000000..6dbc9a5 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveFloatArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveFloatArrayField(float[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveFloatField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveFloatField.java new file mode 100644 index 0000000..87801be --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveFloatField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveFloatField(float field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveIntegerArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveIntegerArrayField.java new file mode 100644 index 0000000..412b594 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveIntegerArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveIntegerArrayField(int[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveIntegerField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveIntegerField.java new file mode 100644 index 0000000..9d7b47d --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveIntegerField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveIntegerField(int field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveLongArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveLongArrayField.java new file mode 100644 index 0000000..d0afa42 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveLongArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveLongArrayField(long[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveLongField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveLongField.java new file mode 100644 index 0000000..dfb5608 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveLongField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveLongField(long field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveShortArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveShortArrayField.java new file mode 100644 index 0000000..95ac8fc --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveShortArrayField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveShortArrayField(short[] field) { +} + diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveShortField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveShortField.java new file mode 100644 index 0000000..129c7ca --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveShortField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveShortField(short field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithShortArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithShortArrayField.java new file mode 100644 index 0000000..e819871 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithShortArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithShortArrayField(Short[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithShortField.java b/src/test/java/org/simdjson/schemas/RecordWithShortField.java new file mode 100644 index 0000000..046447e --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithShortField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithShortField(Short field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithShortListField.java b/src/test/java/org/simdjson/schemas/RecordWithShortListField.java new file mode 100644 index 0000000..f4c5d20 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithShortListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithShortListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithStringArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithStringArrayField.java new file mode 100644 index 0000000..08ce42f --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithStringArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithStringArrayField(String[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithStringField.java b/src/test/java/org/simdjson/schemas/RecordWithStringField.java new file mode 100644 index 0000000..099d4e6 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithStringField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithStringField(String field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithStringListField.java b/src/test/java/org/simdjson/schemas/RecordWithStringListField.java new file mode 100644 index 0000000..5c4cba9 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithStringListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithStringListField(List field) { +} diff --git a/src/test/java/org/simdjson/testutils/CartesianTestCsv.java b/src/test/java/org/simdjson/testutils/CartesianTestCsv.java new file mode 100644 index 0000000..d4c601e --- /dev/null +++ b/src/test/java/org/simdjson/testutils/CartesianTestCsv.java @@ -0,0 +1,16 @@ +package org.simdjson.testutils; + +import org.junitpioneer.jupiter.cartesian.CartesianArgumentsSource; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Target(ElementType.PARAMETER) +@Retention(RetentionPolicy.RUNTIME) +@CartesianArgumentsSource(CartesianTestCsvArgumentsProvider.class) +public @interface CartesianTestCsv { + + String[] value() default {}; +} diff --git a/src/test/java/org/simdjson/testutils/CartesianTestCsvArgumentsProvider.java b/src/test/java/org/simdjson/testutils/CartesianTestCsvArgumentsProvider.java new file mode 100644 index 0000000..35eaea0 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/CartesianTestCsvArgumentsProvider.java @@ -0,0 +1,25 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junitpioneer.jupiter.cartesian.CartesianParameterArgumentsProvider; + +import java.lang.reflect.Parameter; +import java.util.Arrays; +import java.util.Objects; +import java.util.stream.Stream; + +class CartesianTestCsvArgumentsProvider implements CartesianParameterArgumentsProvider { + + @Override + public Stream provideArguments(ExtensionContext context, Parameter parameter) { + CartesianTestCsv source = Objects.requireNonNull(parameter.getAnnotation(CartesianTestCsv.class)); + return Arrays.stream(source.value()) + .map(row -> row.split(",")) + .peek(row -> { + for (int i = 0; i < row.length; i++) { + row[i] = row[i].trim(); + } + }) + .map(CartesianTestCsvRow::new); + } +} diff --git a/src/test/java/org/simdjson/testutils/CartesianTestCsvRow.java b/src/test/java/org/simdjson/testutils/CartesianTestCsvRow.java new file mode 100644 index 0000000..4785cfb --- /dev/null +++ b/src/test/java/org/simdjson/testutils/CartesianTestCsvRow.java @@ -0,0 +1,39 @@ +package org.simdjson.testutils; + +import java.util.Arrays; + +public class CartesianTestCsvRow { + + private final String[] cells; + + CartesianTestCsvRow(String[] cells) { + this.cells = cells; + } + + public String getValueAsString(int column) { + return cells[column]; + } + + public double getValueAsDouble(int column) { + return Double.parseDouble(cells[column]); + } + + public float getValueAsFloat(int column) { + return Float.parseFloat(cells[column]); + } + + public Object getValue(int column, Class expectedTye) { + if (expectedTye == Float.class || expectedTye == float.class) { + return getValueAsFloat(column); + } + if (expectedTye == Double.class || expectedTye == double.class) { + return getValueAsDouble(column); + } + throw new UnsupportedOperationException("Unsupported type: " + expectedTye.getName()); + } + + @Override + public String toString() { + return Arrays.toString(cells); + } +} diff --git a/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFile.java b/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFile.java new file mode 100644 index 0000000..fefaf36 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFile.java @@ -0,0 +1,82 @@ +package org.simdjson.testutils; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.Iterator; + +public class FloatingPointNumberTestFile { + + private final File file; + + FloatingPointNumberTestFile(File file) { + this.file = file; + } + + public FloatingPointNumberTestCasesIterator iterator() throws IOException { + return new FloatingPointNumberTestCasesIterator(file); + } + + @Override + public String toString() { + return file.toString(); + } + + public record FloatingPointNumberTestCase(int line, String input, float expectedFloat, double expectedDouble) { + + } + + public static class FloatingPointNumberTestCasesIterator implements Iterator, AutoCloseable { + + private final BufferedReader br; + + private int nextLineNo = 0; + private String nextLine; + + private FloatingPointNumberTestCasesIterator(File file) throws IOException { + br = new BufferedReader(new FileReader(file)); + moveToNextLine(); + } + + @Override + public boolean hasNext() { + return nextLine != null; + } + + @Override + public FloatingPointNumberTestCase next() { + String[] cells = nextLine.split(" "); + float expectedFloat = Float.intBitsToFloat(Integer.decode("0x" + cells[1])); + double expectedDouble = Double.longBitsToDouble(Long.decode("0x" + cells[2])); + String input = readInputNumber(cells[3]); + try { + moveToNextLine(); + } catch (IOException e) { + throw new RuntimeException(e); + } + return new FloatingPointNumberTestCase(nextLineNo, input, expectedFloat, expectedDouble); + } + + @Override + public void close() throws IOException { + br.close(); + } + + private void moveToNextLine() throws IOException { + nextLine = br.readLine(); + nextLineNo++; + } + + private static String readInputNumber(String input) { + boolean isDouble = input.indexOf('e') >= 0 || input.indexOf('E') >= 0 || input.indexOf('.') >= 0; + if (isDouble) { + if (input.startsWith(".")) { + input = "0" + input; + } + return input.replaceFirst("\\.[eE]", ".0e"); + } + return input + ".0"; + } + } +} diff --git a/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFilesProvider.java b/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFilesProvider.java new file mode 100644 index 0000000..bb9f152 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFilesProvider.java @@ -0,0 +1,34 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.support.AnnotationConsumer; + +import java.io.File; +import java.nio.file.Path; +import java.util.stream.Stream; + +class FloatingPointNumberTestFilesProvider implements ArgumentsProvider, AnnotationConsumer { + + @Override + public Stream provideArguments(ExtensionContext context) { + return listTestFiles() + .map(FloatingPointNumberTestFile::new) + .map(Arguments::of); + } + + @Override + public void accept(FloatingPointNumberTestFilesSource annotation) { + } + + private static Stream listTestFiles() { + String testDataDir = System.getProperty("org.simdjson.testdata.dir", System.getProperty("user.dir") + "/testdata"); + File[] testFiles = Path.of(testDataDir, "parse-number-fxx-test-data", "data").toFile().listFiles(); + if (testFiles == null) { + return Stream.empty(); + } + return Stream.of(testFiles) + .filter(File::isFile); + } +} diff --git a/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFilesSource.java b/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFilesSource.java new file mode 100644 index 0000000..3e2cd3b --- /dev/null +++ b/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFilesSource.java @@ -0,0 +1,26 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.params.provider.ArgumentsSource; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Provides files with floating point number test cases. + *

+ * The default location of the files is the directory /testdata within the project directory. + * It can be customized using the system property 'org.simdjson.testdata.dir'. + *

+ * The files are expected to be formatted as described at: + * https://github.com/nigeltao/parse-number-fxx-test-data + */ +@Documented +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@ArgumentsSource(FloatingPointNumberTestFilesProvider.class) +public @interface FloatingPointNumberTestFilesSource { + +} diff --git a/src/test/java/org/simdjson/JsonValueAssert.java b/src/test/java/org/simdjson/testutils/JsonValueAssert.java similarity index 81% rename from src/test/java/org/simdjson/JsonValueAssert.java rename to src/test/java/org/simdjson/testutils/JsonValueAssert.java index 6c8bf66..d3350c0 100644 --- a/src/test/java/org/simdjson/JsonValueAssert.java +++ b/src/test/java/org/simdjson/testutils/JsonValueAssert.java @@ -1,19 +1,16 @@ -package org.simdjson; +package org.simdjson.testutils; import org.assertj.core.api.AbstractAssert; import org.assertj.core.api.Assertions; +import org.simdjson.JsonValue; -class JsonValueAssert extends AbstractAssert { +public class JsonValueAssert extends AbstractAssert { JsonValueAssert(JsonValue actual) { super(actual, JsonValueAssert.class); } - static JsonValueAssert assertThat(JsonValue actual) { - return new JsonValueAssert(actual); - } - - JsonValueAssert isEqualTo(long expected) { + public JsonValueAssert isEqualTo(long expected) { Assertions.assertThat(actual.isLong()) .withFailMessage("Expecting value to be long but was " + getActualType()) .isTrue(); @@ -21,7 +18,7 @@ JsonValueAssert isEqualTo(long expected) { return this; } - JsonValueAssert isEqualTo(Double expected) { + public JsonValueAssert isEqualTo(Double expected) { Assertions.assertThat(actual.isDouble()) .withFailMessage("Expecting value to be double but was " + getActualType()) .isTrue(); @@ -29,7 +26,7 @@ JsonValueAssert isEqualTo(Double expected) { return this; } - JsonValueAssert isEqualTo(String expected) { + public JsonValueAssert isEqualTo(String expected) { Assertions.assertThat(actual.isString()) .withFailMessage("Expecting value to be string but was " + getActualType()) .isTrue(); @@ -37,7 +34,7 @@ JsonValueAssert isEqualTo(String expected) { return this; } - JsonValueAssert isEqualTo(boolean expected) { + public JsonValueAssert isEqualTo(boolean expected) { Assertions.assertThat(actual.isBoolean()) .withFailMessage("Expecting value to be boolean but was " + getActualType()) .isTrue(); diff --git a/src/test/java/org/simdjson/testutils/MapEntry.java b/src/test/java/org/simdjson/testutils/MapEntry.java new file mode 100644 index 0000000..e821958 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/MapEntry.java @@ -0,0 +1,10 @@ +package org.simdjson.testutils; + +public @interface MapEntry { + + String[] stringKey() default {}; + + Class[] classKey() default {}; + + String value(); +} diff --git a/src/test/java/org/simdjson/testutils/MapSource.java b/src/test/java/org/simdjson/testutils/MapSource.java new file mode 100644 index 0000000..4271b74 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/MapSource.java @@ -0,0 +1,18 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.params.provider.ArgumentsSource; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Documented +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@ArgumentsSource(MapSourceProvider.class) +public @interface MapSource { + + MapEntry[] value(); +} diff --git a/src/test/java/org/simdjson/testutils/MapSourceProvider.java b/src/test/java/org/simdjson/testutils/MapSourceProvider.java new file mode 100644 index 0000000..1e38928 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/MapSourceProvider.java @@ -0,0 +1,39 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.support.AnnotationConsumer; + +import java.util.Arrays; +import java.util.stream.Stream; + +class MapSourceProvider implements ArgumentsProvider, AnnotationConsumer { + + private MapEntry[] entries; + + @Override + public void accept(MapSource mapSource) { + entries = mapSource.value(); + } + + @Override + public Stream provideArguments(ExtensionContext context) { + return Arrays.stream(entries) + .map(entry -> { + Object[] key = null; + if (entry.stringKey().length != 0) { + key = entry.stringKey(); + } else if (entry.classKey().length != 0) { + key = entry.classKey(); + } + if (key == null) { + throw new IllegalArgumentException("Missing key."); + } + if (key.length > 1) { + throw new IllegalArgumentException("Expected one key, got " + key.length); + } + return Arguments.of(key[0], entry.value()); + }); + } +} diff --git a/src/test/java/org/simdjson/testutils/NumberTestData.java b/src/test/java/org/simdjson/testutils/NumberTestData.java new file mode 100644 index 0000000..215c7f2 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/NumberTestData.java @@ -0,0 +1,42 @@ +package org.simdjson.testutils; + +import java.util.Random; + +class NumberTestData { + + private static final Random RANDOM = new Random(); + + static byte randomByte() { + return (byte) RANDOM.nextInt(); + } + + static short randomShort() { + return (short) RANDOM.nextInt(); + } + + static int randomInt() { + return RANDOM.nextInt(); + } + + static long randomLong() { + return RANDOM.nextLong(); + } + + static double randomDouble() { + while (true) { + double randomVal = Double.longBitsToDouble(RANDOM.nextLong()); + if (randomVal < Double.POSITIVE_INFINITY && randomVal > Double.NEGATIVE_INFINITY) { + return randomVal; + } + } + } + + static float randomFloat() { + while (true) { + float randomVal = Float.intBitsToFloat(RANDOM.nextInt()); + if (randomVal < Float.POSITIVE_INFINITY && randomVal > Float.NEGATIVE_INFINITY) { + return randomVal; + } + } + } +} diff --git a/src/test/java/org/simdjson/testutils/RandomIntegralNumberProvider.java b/src/test/java/org/simdjson/testutils/RandomIntegralNumberProvider.java new file mode 100644 index 0000000..d39dc1c --- /dev/null +++ b/src/test/java/org/simdjson/testutils/RandomIntegralNumberProvider.java @@ -0,0 +1,125 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.api.Named; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.support.AnnotationConsumer; + +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Parameter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.function.Supplier; +import java.util.stream.Stream; + +class RandomIntegralNumberProvider implements ArgumentsProvider, AnnotationConsumer { + + private static final int SEQUENCE_SIZE = 10; + + private Class[] classes; + private boolean includeMinMax; + + @Override + public Stream provideArguments(ExtensionContext context) { + return Arrays.stream(classes) + .flatMap(expectedClass -> { + List numbers = generate(expectedClass); + + if (!numbers.isEmpty()) { + return numbers.stream() + .map(num -> createArguments(context, expectedClass, String.valueOf(num), num)); + } + + Constructor constructor = resolveConstructor(expectedClass); + Parameter[] parameters = constructor.getParameters(); + Parameter parameter = parameters[0]; + Class parameterType = parameter.getType(); + numbers = generate(parameterType); + + if (!numbers.isEmpty()) { + return numbers.stream() + .map(num -> { + Object expected = createInstance(constructor, num); + String json = "{\"" + parameter.getName() + "\": " + num + "}"; + return createArguments(context, expectedClass, json, expected); + }); + } + + throw new IllegalArgumentException("Unsupported class: " + expectedClass); + }); + } + + @Override + public void accept(RandomIntegralNumberSource numbersSource) { + classes = numbersSource.classes(); + includeMinMax = numbersSource.includeMinMax(); + } + + private Constructor resolveConstructor(Class expectedClass) { + Constructor[] constructors = expectedClass.getDeclaredConstructors(); + if (constructors.length == 1) { + Constructor constructor = constructors[0]; + Parameter[] parameters = constructor.getParameters(); + if (parameters.length == 1) { + return constructor; + } + } + throw new IllegalArgumentException("Unsupported class: " + expectedClass); + } + + private List generate(Class expectedClass) { + if (expectedClass == Byte.class || expectedClass == byte.class) { + return generateNumbers(NumberTestData::randomByte, Byte.MIN_VALUE, Byte.MAX_VALUE); + } + if (expectedClass == Short.class || expectedClass == short.class) { + return generateNumbers(NumberTestData::randomShort, Short.MIN_VALUE, Short.MAX_VALUE); + } + if (expectedClass == Integer.class || expectedClass == int.class) { + return generateNumbers(NumberTestData::randomInt, Integer.MIN_VALUE, Integer.MAX_VALUE); + } + if (expectedClass == Long.class || expectedClass == long.class) { + return generateNumbers(NumberTestData::randomLong, Long.MIN_VALUE, Long.MAX_VALUE); + } + return Collections.emptyList(); + } + + private List generateNumbers(Supplier generator, T min, T max) { + List numbers = new ArrayList<>(); + if (includeMinMax) { + numbers.add(min); + numbers.add(max); + } + int randomSequenceLen = SEQUENCE_SIZE - numbers.size(); + for (int i = 0; i < randomSequenceLen; i++) { + numbers.add(generator.get()); + } + return numbers; + } + + private static Object createInstance(Constructor constructor, Object arg) { + try { + return constructor.newInstance(arg); + } catch (InstantiationException | IllegalAccessException | InvocationTargetException e) { + throw new RuntimeException(e); + } + } + + private static Arguments createArguments(ExtensionContext context, Class schema, String json, Object expected) { + Class[] parameterTypes = context.getRequiredTestMethod().getParameterTypes(); + Object[] args = new Object[parameterTypes.length]; + for (int i = 0; i < args.length; i++) { + if (parameterTypes[i] == Class.class) { + args[i] = Named.named(schema.getName(), schema); + } else if (parameterTypes[i] == String.class) { + args[i] = json; + } else { + args[i] = expected; + } + } + return () -> args; + } +} diff --git a/src/test/java/org/simdjson/testutils/RandomIntegralNumberSource.java b/src/test/java/org/simdjson/testutils/RandomIntegralNumberSource.java new file mode 100644 index 0000000..d2938f5 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/RandomIntegralNumberSource.java @@ -0,0 +1,23 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.params.provider.ArgumentsSource; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Documented +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@ArgumentsSource(RandomIntegralNumberProvider.class) +public @interface RandomIntegralNumberSource { + + Class[] classes(); + + /** + * If set to true generated test arguments will include the min and max values for a given numeric type. + */ + boolean includeMinMax(); +} diff --git a/src/test/java/org/simdjson/testutils/RandomStringProvider.java b/src/test/java/org/simdjson/testutils/RandomStringProvider.java new file mode 100644 index 0000000..30d9840 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/RandomStringProvider.java @@ -0,0 +1,58 @@ +package org.simdjson.testutils; + +import org.apache.commons.text.StringEscapeUtils; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.support.AnnotationConsumer; + +import java.util.stream.IntStream; +import java.util.stream.Stream; + +class RandomStringProvider implements ArgumentsProvider, AnnotationConsumer { + + private int count; + private int minChars; + private int maxChars; + + @Override + public void accept(RandomStringSource randomStringSource) { + count = randomStringSource.count(); + if (count <= 0) { + throw new IllegalArgumentException("count has to be greater than zero"); + } + minChars = randomStringSource.minChars(); + if (minChars <= 0) { + throw new IllegalArgumentException("minChars has to be greater than zero"); + } + maxChars = randomStringSource.maxChars(); + if (maxChars <= 0 || maxChars == Integer.MAX_VALUE) { + throw new IllegalArgumentException("maxChars has to be withing the range of [1, Integer.MAX_VALUE - 1]"); + } + if (maxChars < minChars) { + throw new IllegalArgumentException("maxChars has to be greater or equal to minChars"); + } + } + + @Override + public Stream provideArguments(ExtensionContext context) { + Class[] parameterTypes = context.getRequiredTestMethod().getParameterTypes(); + if (parameterTypes.length != 2) { + throw new IllegalArgumentException("Test method should have two arguments: an input string and an expected value."); + } + if (parameterTypes[0] != String.class) { + throw new IllegalArgumentException("The first argument must be a String."); + } + if (parameterTypes[1] != String.class && parameterTypes[1] != Character.class && parameterTypes[1] != char.class) { + throw new IllegalArgumentException("The second argument must be either a String, Character, or char."); + } + return IntStream.range(0, count) + .mapToObj(i -> { + String jsonStr = StringTestData.randomString(minChars, maxChars); + if (parameterTypes[1] == String.class) { + return Arguments.of(jsonStr, StringEscapeUtils.unescapeJson(jsonStr)); + } + return Arguments.of(jsonStr, StringEscapeUtils.unescapeJson(jsonStr).charAt(0)); + }); + } +} diff --git a/src/test/java/org/simdjson/testutils/RandomStringSource.java b/src/test/java/org/simdjson/testutils/RandomStringSource.java new file mode 100644 index 0000000..a7aecb3 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/RandomStringSource.java @@ -0,0 +1,22 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.params.provider.ArgumentsSource; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Documented +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@ArgumentsSource(RandomStringProvider.class) +public @interface RandomStringSource { + + int count() default 10; + + int minChars() default 1; + + int maxChars() default 100; +} diff --git a/src/test/java/org/simdjson/testutils/SchemaBasedRandomValueProvider.java b/src/test/java/org/simdjson/testutils/SchemaBasedRandomValueProvider.java new file mode 100644 index 0000000..2076379 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/SchemaBasedRandomValueProvider.java @@ -0,0 +1,232 @@ +package org.simdjson.testutils; + +import org.apache.commons.lang3.RandomUtils; +import org.apache.commons.text.StringEscapeUtils; +import org.junit.jupiter.api.Named; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.support.AnnotationConsumer; + +import java.lang.reflect.Array; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Parameter; +import java.lang.reflect.ParameterizedType; +import java.lang.reflect.Type; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.stream.Stream; + +class SchemaBasedRandomValueProvider implements ArgumentsProvider, AnnotationConsumer { + + private static final Set> SUPPORTED_PRIMITIVE_TYPES = Set.of( + Boolean.class, + boolean.class, + String.class, + Character.class, + char.class, + Byte.class, + byte.class, + Short.class, + short.class, + Integer.class, + int.class, + Long.class, + long.class, + Float.class, + float.class, + Double.class, + double.class + ); + private static final GeneratedElement NULL_ELEMENT = new GeneratedElement(null, "null"); + private static final int MIN_ARRAY_ELEMENT = 1; + private static final int MAX_ARRAY_ELEMENT = 50; + + private Class[] schemas; + private boolean nulls; + + @Override + public void accept(SchemaBasedRandomValueSource schemaBasedRandomValueSource) { + schemas = schemaBasedRandomValueSource.schemas(); + nulls = schemaBasedRandomValueSource.nulls(); + } + + @Override + public Stream provideArguments(ExtensionContext context) { + Class[] parameterTypes = context.getRequiredTestMethod().getParameterTypes(); + return Arrays.stream(schemas) + .map(schema -> { + GeneratedElement expected = generate(schema, schema); + Object[] args = new Object[parameterTypes.length]; + for (int i = 0; i < args.length; i++) { + if (parameterTypes[i] == Class.class) { + args[i] = Named.named(schema.getName(), schema); + } else if (parameterTypes[i] == String.class) { + args[i] = expected.string(); + } else { + args[i] = expected.value(); + } + } + return () -> args; + }); + } + + private GeneratedElement generate(Type type, Class c) { + if (SUPPORTED_PRIMITIVE_TYPES.contains(c)) { + return generatePrimitive(type); + } else if (c.isArray()) { + return generateArray(c); + } else if (c == List.class) { + return generateList((ParameterizedType) type); + } else { + Constructor constructor = resolveConstructor(c); + Parameter[] parameters = constructor.getParameters(); + Object[] args = new Object[parameters.length]; + StringBuilder jsonBuilder = new StringBuilder(); + jsonBuilder.append('{'); + for (int i = 0; i < args.length; i++) { + Parameter parameter = parameters[i]; + GeneratedElement generatedElement = generate(parameter.getAnnotatedType().getType(), parameter.getType()); + args[i] = generatedElement.value(); + jsonBuilder.append('"'); + jsonBuilder.append(parameters[i].getName()); + jsonBuilder.append("\": "); + jsonBuilder.append(generatedElement.string()); + } + jsonBuilder.append('}'); + try { + Object o = constructor.newInstance(args); + return new GeneratedElement(o, jsonBuilder.toString()); + } catch (InstantiationException | IllegalAccessException | InvocationTargetException e) { + throw new RuntimeException(e); + } + } + } + + private GeneratedElement generateArray(Class type) { + StringBuilder jsonStringBuilder = new StringBuilder(); + Class elementType = extractElementType(type); + int len = RandomUtils.nextInt(MIN_ARRAY_ELEMENT, MAX_ARRAY_ELEMENT + 1); + Object array = Array.newInstance(elementType, len); + jsonStringBuilder.append('['); + boolean arrayHasNullElement = false; + for (int i = 0; i < len; i++) { + boolean nullElement = nulls && ((!arrayHasNullElement && i == len - 1) || RandomUtils.nextBoolean()); + GeneratedElement element; + if (nullElement) { + element = NULL_ELEMENT; + } else if (elementType.isArray()) { + element = generateArray(elementType); + } else { + element = generateArrayElement(elementType); + } + Array.set(array, i, element.value()); + jsonStringBuilder.append(element.string()); + arrayHasNullElement |= nullElement; + if (i != len - 1) { + jsonStringBuilder.append(','); + } + } + jsonStringBuilder.append(']'); + return new GeneratedElement(array, jsonStringBuilder.toString()); + } + + private GeneratedElement generateList(ParameterizedType type) { + StringBuilder jsonStringBuilder = new StringBuilder(); + Type elementType = type.getActualTypeArguments()[0]; + int len = RandomUtils.nextInt(MIN_ARRAY_ELEMENT, MAX_ARRAY_ELEMENT + 1); + List list = new ArrayList<>(); + jsonStringBuilder.append('['); + boolean arrayHasNullElement = false; + for (int i = 0; i < len; i++) { + boolean nullElement = nulls && ((!arrayHasNullElement && i == len - 1) || RandomUtils.nextBoolean()); + GeneratedElement element; + if (nullElement) { + element = NULL_ELEMENT; + } else if (elementType instanceof ParameterizedType parameterizedType) { + element = generate(elementType, (Class) parameterizedType.getRawType()); + } else { + element = generate(elementType, (Class) elementType); + } + list.add(element.value()); + jsonStringBuilder.append(element.string()); + arrayHasNullElement |= nullElement; + if (i != len - 1) { + jsonStringBuilder.append(','); + } + } + jsonStringBuilder.append(']'); + return new GeneratedElement(list, jsonStringBuilder.toString()); + } + + private static Class extractElementType(Class c) { + Class elementType = c.componentType(); + if (elementType == null) { + return c; + } + return elementType; + } + + private GeneratedElement generateArrayElement(Class elementType) { + if (SUPPORTED_PRIMITIVE_TYPES.contains(elementType)) { + return generatePrimitive(elementType); + } + return generate(elementType, elementType); + } + + private Constructor resolveConstructor(Class expectedClass) { + Constructor[] constructors = expectedClass.getDeclaredConstructors(); + if (constructors.length == 1) { + Constructor constructor = constructors[0]; + constructor.setAccessible(true); + return constructor; + } + throw new IllegalArgumentException("Unsupported class: " + expectedClass + ". It should has only one constructor."); + } + + private GeneratedElement generatePrimitive(Type elementType) { + if (elementType == Boolean.class || elementType == boolean.class) { + boolean element = RandomUtils.nextBoolean(); + return new GeneratedElement(element, Boolean.toString(element)); + } + if (elementType == String.class) { + String element = StringTestData.randomString(1, 50); + return new GeneratedElement(StringEscapeUtils.unescapeJson(element), "\"" + element + "\""); + } + if (elementType == Character.class || elementType == char.class) { + String element = StringTestData.randomString(1, 1); + return new GeneratedElement(StringEscapeUtils.unescapeJson(element).charAt(0), "\"" + element + "\""); + } + if (elementType == Byte.class || elementType == byte.class) { + byte element = NumberTestData.randomByte(); + return new GeneratedElement(element, String.valueOf(element)); + } + if (elementType == Short.class || elementType == short.class) { + short element = NumberTestData.randomShort(); + return new GeneratedElement(element, String.valueOf(element)); + } + if (elementType == Integer.class || elementType == int.class) { + int element = NumberTestData.randomInt(); + return new GeneratedElement(element, String.valueOf(element)); + } + if (elementType == Long.class || elementType == long.class) { + long element = NumberTestData.randomLong(); + return new GeneratedElement(element, String.valueOf(element)); + } + if (elementType == Float.class || elementType == float.class) { + float element = NumberTestData.randomFloat(); + return new GeneratedElement(element, String.valueOf(element)); + } + if (elementType == Double.class || elementType == double.class) { + double element = NumberTestData.randomDouble(); + return new GeneratedElement(element, String.valueOf(element)); + } + throw new UnsupportedOperationException("Unsupported type: " + elementType + ". The following classes are supported: " + SUPPORTED_PRIMITIVE_TYPES); + } + + private record GeneratedElement(Object value, String string) { + } +} diff --git a/src/test/java/org/simdjson/testutils/SchemaBasedRandomValueSource.java b/src/test/java/org/simdjson/testutils/SchemaBasedRandomValueSource.java new file mode 100644 index 0000000..fc797e9 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/SchemaBasedRandomValueSource.java @@ -0,0 +1,23 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.params.provider.ArgumentsSource; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Documented +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@ArgumentsSource(SchemaBasedRandomValueProvider.class) +public @interface SchemaBasedRandomValueSource { + + Class[] schemas(); + + /** + * If set to true at least one null will appear in every generated array. + */ + boolean nulls(); +} diff --git a/src/test/java/org/simdjson/testutils/SimdJsonAssertions.java b/src/test/java/org/simdjson/testutils/SimdJsonAssertions.java new file mode 100644 index 0000000..e0f114b --- /dev/null +++ b/src/test/java/org/simdjson/testutils/SimdJsonAssertions.java @@ -0,0 +1,11 @@ +package org.simdjson.testutils; + +import org.assertj.core.api.Assertions; +import org.simdjson.JsonValue; + +public class SimdJsonAssertions extends Assertions { + + public static JsonValueAssert assertThat(JsonValue actual) { + return new JsonValueAssert(actual); + } +} diff --git a/src/test/java/org/simdjson/testutils/StringTestData.java b/src/test/java/org/simdjson/testutils/StringTestData.java new file mode 100644 index 0000000..2389a91 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/StringTestData.java @@ -0,0 +1,118 @@ +package org.simdjson.testutils; + +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.RandomUtils; +import org.apache.commons.text.StringEscapeUtils; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; + +import static java.lang.Character.MAX_CODE_POINT; +import static java.lang.Character.isBmpCodePoint; +import static java.lang.Character.lowSurrogate; +import static java.util.stream.IntStream.rangeClosed; + +public class StringTestData { + + private static final Map CONTROL_CHARACTER_ESCAPE = new HashMap<>(); + + static { + for (int codePoint = 0; codePoint <= 0x001F; codePoint++) { + String controlCharacter = String.valueOf((char) codePoint); + CONTROL_CHARACTER_ESCAPE.put(controlCharacter, toUnicodeEscape(codePoint)); + } + } + + public static String randomString(int minChars, int maxChars) { + int stringLen = RandomUtils.nextInt(minChars, maxChars + 1); + var string = RandomStringUtils.random(stringLen) + .replaceAll("\"", "\\\\\"") + .replaceAll("\\\\", "\\\\\\\\"); + for (Map.Entry entry : CONTROL_CHARACTER_ESCAPE.entrySet()) { + string = string.replaceAll(entry.getKey(), Matcher.quoteReplacement(entry.getValue())); + } + System.out.println("Generated string: " + string + " [" + StringEscapeUtils.escapeJava(string) + "]"); + return string; + } + + /** + * Returns all usable characters that don't need to be escaped. + * It means that all control characters, '"', and '\' are not returned. + */ + public static List usableSingleCodeUnitCharacters() { + return rangeClosed(0, MAX_CODE_POINT) + .filter(Character::isBmpCodePoint) + .filter(codePoint -> !isReservedCodePoint(codePoint)) + .filter(codePoint -> !Character.isISOControl(codePoint)) + .filter(codePoint -> (char) codePoint != '"') + .filter(codePoint -> (char) codePoint != '\\') + .mapToObj(codePoint -> (char) codePoint) + .map(String::valueOf) + .toList(); + } + + public static List usableEscapedSingleCodeUnitCharacters() { + return rangeClosed(0, MAX_CODE_POINT) + .filter(Character::isBmpCodePoint) + .filter(codePoint -> !isReservedCodePoint(codePoint)) + .mapToObj(StringTestData::toUnicodeEscape) + .toList(); + } + + public static List reservedEscapedSingleCodeUnitCharacters() { + return rangeClosed(0, MAX_CODE_POINT) + .filter(Character::isBmpCodePoint) + .filter(StringTestData::isReservedCodePoint) + .mapToObj(StringTestData::toUnicodeEscape) + .toList(); + } + + public static List escapedLowSurrogates() { + return rangeClosed(0xDC00, 0xDFFF) + .mapToObj(StringTestData::toUnicodeEscape) + .toList(); + } + + public static List usableTwoCodeUnitsCharacters() { + return rangeClosed(0, MAX_CODE_POINT) + .filter(codePoint -> !Character.isBmpCodePoint(codePoint)) + .mapToObj(Character::toString) + .toList(); + } + + public static List usableEscapedUnicodeCharacters() { + return rangeClosed(0, MAX_CODE_POINT) + .filter(codePoint -> !isReservedCodePoint(codePoint)) + .mapToObj(StringTestData::toUnicodeEscape) + .toList(); + } + + public static List escapedUnicodeCharactersWithInvalidLowSurrogate() { + return rangeClosed(0x0000, 0xFFFF) + .filter(lowSurrogate -> lowSurrogate < 0xDC00 || lowSurrogate > 0xDFFF) + .mapToObj(lowSurrogate -> String.format("\\uD800\\u%04X", lowSurrogate)) + .toList(); + } + + public static List unescapedControlCharacters() { + return rangeClosed(0, 0x001F) + .mapToObj(codePoint -> (char) codePoint) + .map(String::valueOf) + .toList(); + } + + private static String toUnicodeEscape(int codePoint) { + if (isBmpCodePoint(codePoint)) { + return String.format("\\u%04X", codePoint); + } else { + return String.format("\\u%04X\\u%04X", + (int) Character.highSurrogate(codePoint), (int) lowSurrogate(codePoint)); + } + } + + private static boolean isReservedCodePoint(int codePoint) { + return codePoint >= 0xD800 && codePoint <= 0xDFFF; + } +}