Skip to content

Commit

Permalink
fix comments, rename SimdjsonParser2 to SimdjsonParserWithFixPath
Browse files Browse the repository at this point in the history
  • Loading branch information
jimeng committed Oct 22, 2024
1 parent d0c4330 commit f6fc9e5
Show file tree
Hide file tree
Showing 6 changed files with 322 additions and 5 deletions.
11 changes: 10 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ group = 'org.simdjson'
version = scmVersion.version

repositories {
mavenLocal()
mavenCentral()
}

Expand All @@ -45,6 +46,7 @@ java {
ext {
junitVersion = '5.10.2'
jsoniterScalaVersion = '2.28.4'
lombokVersion = '1.18.34'
}

dependencies {
Expand All @@ -53,13 +55,18 @@ dependencies {
jmhImplementation group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-core_2.13', version: jsoniterScalaVersion
jmhImplementation group: 'com.google.guava', name: 'guava', version: '32.1.2-jre'
compileOnly group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-macros_2.13', version: jsoniterScalaVersion
compileOnly group: 'org.projectlombok', name: 'lombok', version: lombokVersion
annotationProcessor group: 'org.projectlombok', name: 'lombok', version: lombokVersion
testCompileOnly group: 'org.projectlombok', name: 'lombok', version: lombokVersion
testAnnotationProcessor group: 'org.projectlombok', name: 'lombok', version: lombokVersion

testImplementation group: 'org.assertj', name: 'assertj-core', version: '3.24.2'
testImplementation group: 'org.apache.commons', name: 'commons-text', version: '1.10.0'
testImplementation group: 'org.junit-pioneer', name: 'junit-pioneer', version: '2.2.0'
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-api', version: junitVersion
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-params', version: junitVersion
testRuntimeOnly group: 'org.junit.jupiter', name: 'junit-jupiter-engine', version: junitVersion

}

tasks.register('downloadTestData') {
Expand Down Expand Up @@ -160,7 +167,9 @@ publishing {
publications {
mavenJava(MavenPublication) {
from(components.java)

groupId = 'org.simdjson'
artifactId = 'simdjson-java'
version = scmVersion.version
pom {
name = project.name
description = 'A Java version of simdjson, a high-performance JSON parser utilizing SIMD instructions.'
Expand Down
49 changes: 49 additions & 0 deletions src/jmh/java/org/simdjson/ParseAndSelectFixPathBenchMark.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package org.simdjson;

import java.io.IOException;
import java.io.InputStream;
import java.util.concurrent.TimeUnit;

import org.openjdk.jmh.annotations.*;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;

@State(Scope.Benchmark)
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.SECONDS)
public class ParseAndSelectFixPathBenchMark {
@Param({"/twitter.json"})
String fileName;
private byte[] buffer;
private final SimdJsonParser simdJsonParser = new SimdJsonParser();
private final ObjectMapper jacksonObjectMapper = new ObjectMapper();
private final SimdJsonParserWithFixPath simdJsonParserWithFixPath = new SimdJsonParserWithFixPath(
"statuses.0.user.default_profile", "statuses.0.user.screen_name",
"statuses.0.user.name", "statuses.0.user.id", "statuses.0.user.description",
"statuses.1.user.default_profile", "statuses.1.user.screen_name",
"statuses.1.user.name", "statuses.1.user.id", "statuses.1.user.description");

@Setup(Level.Trial)
public void setup() throws IOException {
try (InputStream is = ParseBenchmark.class.getResourceAsStream("/twitter.json")) {
buffer = is.readAllBytes();
}
System.out.println("VectorSpecies = " + VectorUtils.BYTE_SPECIES);
}

@Benchmark
public JsonValue parseMultiValuesForFixPaths_SimdJson() {
return simdJsonParser.parse(buffer, buffer.length);
}

@Benchmark
public String[] parseMultiValuesForFixPaths_SimdJsonParserWithFixPath() {
return simdJsonParserWithFixPath.parse(buffer, buffer.length);
}

@Benchmark
public JsonNode parseMultiValuesForFixPaths_Jackson() throws IOException {
return jacksonObjectMapper.readTree(buffer);
}
}
8 changes: 5 additions & 3 deletions src/main/java/org/simdjson/BitIndexes.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.simdjson;

class BitIndexes {
import java.util.Arrays;

public class BitIndexes {

private final int[] indexes;

Expand Down Expand Up @@ -44,8 +46,8 @@ private long clearLowestBit(long bits) {
return bits & (bits - 1);
}

void advance() {
readIdx++;
int advance() {
return indexes[readIdx++];
}

int getAndAdvance() {
Expand Down
7 changes: 6 additions & 1 deletion src/main/java/org/simdjson/SimdJsonParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ public SimdJsonParser(int capacity, int maxDepth) {
paddedBuffer = new byte[capacity];
indexer = new StructuralIndexer(bitIndexes);
}

BitIndexes buildBitIndex (byte[] buffer, int len) {
byte[] padded = padIfNeeded(buffer, len);
reset();
stage1(padded, len);
return bitIndexes;
}
public <T> T parse(byte[] buffer, int len, Class<T> expectedType) {
byte[] padded = padIfNeeded(buffer, len);
reset();
Expand Down
219 changes: 219 additions & 0 deletions src/main/java/org/simdjson/SimdJsonParserWithFixPath.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
package org.simdjson;

import java.util.HashMap;
import java.util.Map;

import lombok.Data;
import lombok.RequiredArgsConstructor;

public class SimdJsonParserWithFixPath {

@Data
@RequiredArgsConstructor
static class JsonNode {
private long version = 0;
private boolean isLeaf = false;
private final String name;
private String value = null;
private JsonNode parent = null;
private Map<String, JsonNode> children = new HashMap<>();
private int start = -1;
private int end = -1;
}

private final SimdJsonParser parser;
private BitIndexes bitIndexes;
private final JsonNode root = new JsonNode(null);
private final JsonNode[] row;
private final String[] result;
private final String[] emptyResult;
private JsonNode ptr;
private byte[] buffer;
private final int expectParseCols;
// every time json string is processed, currentVersion will be incremented by 1
private long currentVersion = 0;

public SimdJsonParserWithFixPath(String... args) {
parser = new SimdJsonParser();
expectParseCols = args.length;
row = new JsonNode[expectParseCols];
result = new String[expectParseCols];
emptyResult = new String[expectParseCols];
for (int i = 0; i < args.length; i++) {
emptyResult[i] = null;
}
for (int i = 0; i < expectParseCols; i++) {
JsonNode cur = root;
String[] paths = args[i].split("\\.");
for (int j = 0; j < paths.length; j++) {
if (!cur.getChildren().containsKey(paths[j])) {
JsonNode child = new JsonNode(paths[j]);
cur.getChildren().put(paths[j], child);
child.setParent(cur);
}
cur = cur.getChildren().get(paths[j]);
}
cur.setLeaf(true);
row[i] = cur;
}

}

public String[] parse(byte[] buffer, int len) {
this.bitIndexes = parser.buildBitIndex(buffer, len);
if (buffer == null || buffer.length == 0) {
return emptyResult;
}
this.currentVersion++;
this.ptr = root;
this.buffer = buffer;

switch (buffer[bitIndexes.peek()]) {
case '{' -> {
parseMap();
}
case '[' -> {
parseList();
}
default -> {
throw new RuntimeException("invalid json format");
}
}
return getResult();
}

private String parseValue() {
int start = bitIndexes.advance();
int next = bitIndexes.peek();
String field = new String(buffer, start, next - start).trim();
if ("null".equalsIgnoreCase(field)) {
return null;
}
// field type is string or type is decimal
if (field.startsWith("\"")) {
field = field.substring(1, field.length() - 1);
}
return field;
}

private void parseElement(String expectFieldName) {
// if expectFieldName is null, parent is map, else is list
if (expectFieldName == null) {
expectFieldName = parseValue();
bitIndexes.advance(); // skip :
}
if (!ptr.getChildren().containsKey(expectFieldName)) {
skip(false);
return;
}
ptr = ptr.getChildren().get(expectFieldName);
switch (buffer[bitIndexes.peek()]) {
case '{' -> {
parseMap();
}
case '[' -> {
parseList();
}
default -> {
ptr.setValue(skip(true));
ptr.setVersion(currentVersion);
}
}
ptr = ptr.getParent();
}

private void parseMap() {
if (ptr.getChildren() == null) {
ptr.setValue(skip(true));
ptr.setVersion(currentVersion);
return;
}
ptr.setStart(bitIndexes.peek());
bitIndexes.advance();
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != '}') {
parseElement(null);
if (buffer[bitIndexes.peek()] == ',') {
bitIndexes.advance();
}
}
ptr.setEnd(bitIndexes.peek());
if (ptr.isLeaf()) {
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
ptr.setVersion(currentVersion);
}
bitIndexes.advance();
}

private void parseList() {
if (ptr.getChildren() == null) {
ptr.setValue(skip(true));
ptr.setVersion(currentVersion);
return;
}
ptr.setStart(bitIndexes.peek());
bitIndexes.advance();
int i = 0;
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != ']') {
parseElement("" + i);
if (buffer[bitIndexes.peek()] == ',') {
bitIndexes.advance();
}
i++;
}
ptr.setEnd(bitIndexes.peek());
if (ptr.isLeaf()) {
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
ptr.setVersion(currentVersion);
}
bitIndexes.advance();
}

private String skip(boolean retainValue) {
int i = 0;
int start = retainValue ? bitIndexes.peek() : 0;
switch (buffer[bitIndexes.peek()]) {
case '{' -> {
i++;
while (i > 0) {
bitIndexes.advance();
if (buffer[bitIndexes.peek()] == '{') {
i++;
} else if (buffer[bitIndexes.peek()] == '}') {
i--;
}
}
int end = bitIndexes.peek();
bitIndexes.advance();
return retainValue ? new String(buffer, start, end - start + 1) : null;
}
case '[' -> {
i++;
while (i > 0) {
bitIndexes.advance();
if (buffer[bitIndexes.peek()] == '[') {
i++;
} else if (buffer[bitIndexes.peek()] == ']') {
i--;
}
}
int end = bitIndexes.peek();
bitIndexes.advance();
return retainValue ? new String(buffer, start, end - start + 1) : null;
}
default -> {
return parseValue();
}
}
}

private String[] getResult() {
for (int i = 0; i < expectParseCols; i++) {
if (row[i].getVersion() < currentVersion) {
result[i] = null;
continue;
}
result[i] = row[i].getValue();
}
return result;
}
}
33 changes: 33 additions & 0 deletions src/test/java/org/simdjson/JsonMultiValueParsingTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package org.simdjson;

import static org.simdjson.testutils.SimdJsonAssertions.assertThat;
import static org.simdjson.testutils.TestUtils.toUtf8;

import org.junit.jupiter.api.Test;

public class JsonMultiValueParsingTest {
@Test
public void testParseMultiValue() {
byte[] json = toUtf8("{\"field1\":{\"field2\":\"value2\",\"field3\":3},\"field4\":[\"value4\",\"value5\"],\"field5\":null}");
SimdJsonParserWithFixPath parser = new SimdJsonParserWithFixPath("field1.field2", "field1.field3", "field4", "field4.0", "field5");
String[] result = parser.parse(json, json.length);
assertThat(result[0]).isEqualTo("value2");
assertThat(result[1]).isEqualTo("3");
assertThat(result[2]).isEqualTo("[\"value4\",\"value5\"]");
assertThat(result[3]).isEqualTo("value4");
assertThat(result[4]).isEqualTo(null);
}

@Test
public void testNonAsciiCharacters() {
byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4}");
SimdJsonParserWithFixPath parser = new SimdJsonParserWithFixPath("ąćśńźż", "\\u20A9\\u0E3F", "αβγ", "😀abc😀");
// when
String[] result = parser.parse(json, json.length);
// then
assertThat(result[0]).isEqualTo("1");
assertThat(result[1]).isEqualTo("2");
assertThat(result[2]).isEqualTo("3");
assertThat(result[3]).isEqualTo("4");
}
}

0 comments on commit f6fc9e5

Please sign in to comment.