-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix comments, rename SimdjsonParser2 to SimdjsonParserWithFixPath
- Loading branch information
jimeng
committed
Oct 22, 2024
1 parent
d0c4330
commit f6fc9e5
Showing
6 changed files
with
322 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
49 changes: 49 additions & 0 deletions
49
src/jmh/java/org/simdjson/ParseAndSelectFixPathBenchMark.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
package org.simdjson; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
import org.openjdk.jmh.annotations.*; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
|
||
@State(Scope.Benchmark) | ||
@BenchmarkMode(Mode.Throughput) | ||
@OutputTimeUnit(TimeUnit.SECONDS) | ||
public class ParseAndSelectFixPathBenchMark { | ||
@Param({"/twitter.json"}) | ||
String fileName; | ||
private byte[] buffer; | ||
private final SimdJsonParser simdJsonParser = new SimdJsonParser(); | ||
private final ObjectMapper jacksonObjectMapper = new ObjectMapper(); | ||
private final SimdJsonParserWithFixPath simdJsonParserWithFixPath = new SimdJsonParserWithFixPath( | ||
"statuses.0.user.default_profile", "statuses.0.user.screen_name", | ||
"statuses.0.user.name", "statuses.0.user.id", "statuses.0.user.description", | ||
"statuses.1.user.default_profile", "statuses.1.user.screen_name", | ||
"statuses.1.user.name", "statuses.1.user.id", "statuses.1.user.description"); | ||
|
||
@Setup(Level.Trial) | ||
public void setup() throws IOException { | ||
try (InputStream is = ParseBenchmark.class.getResourceAsStream("/twitter.json")) { | ||
buffer = is.readAllBytes(); | ||
} | ||
System.out.println("VectorSpecies = " + VectorUtils.BYTE_SPECIES); | ||
} | ||
|
||
@Benchmark | ||
public JsonValue parseMultiValuesForFixPaths_SimdJson() { | ||
return simdJsonParser.parse(buffer, buffer.length); | ||
} | ||
|
||
@Benchmark | ||
public String[] parseMultiValuesForFixPaths_SimdJsonParserWithFixPath() { | ||
return simdJsonParserWithFixPath.parse(buffer, buffer.length); | ||
} | ||
|
||
@Benchmark | ||
public JsonNode parseMultiValuesForFixPaths_Jackson() throws IOException { | ||
return jacksonObjectMapper.readTree(buffer); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
219 changes: 219 additions & 0 deletions
219
src/main/java/org/simdjson/SimdJsonParserWithFixPath.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,219 @@ | ||
package org.simdjson; | ||
|
||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
import lombok.Data; | ||
import lombok.RequiredArgsConstructor; | ||
|
||
public class SimdJsonParserWithFixPath { | ||
|
||
@Data | ||
@RequiredArgsConstructor | ||
static class JsonNode { | ||
private long version = 0; | ||
private boolean isLeaf = false; | ||
private final String name; | ||
private String value = null; | ||
private JsonNode parent = null; | ||
private Map<String, JsonNode> children = new HashMap<>(); | ||
private int start = -1; | ||
private int end = -1; | ||
} | ||
|
||
private final SimdJsonParser parser; | ||
private BitIndexes bitIndexes; | ||
private final JsonNode root = new JsonNode(null); | ||
private final JsonNode[] row; | ||
private final String[] result; | ||
private final String[] emptyResult; | ||
private JsonNode ptr; | ||
private byte[] buffer; | ||
private final int expectParseCols; | ||
// every time json string is processed, currentVersion will be incremented by 1 | ||
private long currentVersion = 0; | ||
|
||
public SimdJsonParserWithFixPath(String... args) { | ||
parser = new SimdJsonParser(); | ||
expectParseCols = args.length; | ||
row = new JsonNode[expectParseCols]; | ||
result = new String[expectParseCols]; | ||
emptyResult = new String[expectParseCols]; | ||
for (int i = 0; i < args.length; i++) { | ||
emptyResult[i] = null; | ||
} | ||
for (int i = 0; i < expectParseCols; i++) { | ||
JsonNode cur = root; | ||
String[] paths = args[i].split("\\."); | ||
for (int j = 0; j < paths.length; j++) { | ||
if (!cur.getChildren().containsKey(paths[j])) { | ||
JsonNode child = new JsonNode(paths[j]); | ||
cur.getChildren().put(paths[j], child); | ||
child.setParent(cur); | ||
} | ||
cur = cur.getChildren().get(paths[j]); | ||
} | ||
cur.setLeaf(true); | ||
row[i] = cur; | ||
} | ||
|
||
} | ||
|
||
public String[] parse(byte[] buffer, int len) { | ||
this.bitIndexes = parser.buildBitIndex(buffer, len); | ||
if (buffer == null || buffer.length == 0) { | ||
return emptyResult; | ||
} | ||
this.currentVersion++; | ||
this.ptr = root; | ||
this.buffer = buffer; | ||
|
||
switch (buffer[bitIndexes.peek()]) { | ||
case '{' -> { | ||
parseMap(); | ||
} | ||
case '[' -> { | ||
parseList(); | ||
} | ||
default -> { | ||
throw new RuntimeException("invalid json format"); | ||
} | ||
} | ||
return getResult(); | ||
} | ||
|
||
private String parseValue() { | ||
int start = bitIndexes.advance(); | ||
int next = bitIndexes.peek(); | ||
String field = new String(buffer, start, next - start).trim(); | ||
if ("null".equalsIgnoreCase(field)) { | ||
return null; | ||
} | ||
// field type is string or type is decimal | ||
if (field.startsWith("\"")) { | ||
field = field.substring(1, field.length() - 1); | ||
} | ||
return field; | ||
} | ||
|
||
private void parseElement(String expectFieldName) { | ||
// if expectFieldName is null, parent is map, else is list | ||
if (expectFieldName == null) { | ||
expectFieldName = parseValue(); | ||
bitIndexes.advance(); // skip : | ||
} | ||
if (!ptr.getChildren().containsKey(expectFieldName)) { | ||
skip(false); | ||
return; | ||
} | ||
ptr = ptr.getChildren().get(expectFieldName); | ||
switch (buffer[bitIndexes.peek()]) { | ||
case '{' -> { | ||
parseMap(); | ||
} | ||
case '[' -> { | ||
parseList(); | ||
} | ||
default -> { | ||
ptr.setValue(skip(true)); | ||
ptr.setVersion(currentVersion); | ||
} | ||
} | ||
ptr = ptr.getParent(); | ||
} | ||
|
||
private void parseMap() { | ||
if (ptr.getChildren() == null) { | ||
ptr.setValue(skip(true)); | ||
ptr.setVersion(currentVersion); | ||
return; | ||
} | ||
ptr.setStart(bitIndexes.peek()); | ||
bitIndexes.advance(); | ||
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != '}') { | ||
parseElement(null); | ||
if (buffer[bitIndexes.peek()] == ',') { | ||
bitIndexes.advance(); | ||
} | ||
} | ||
ptr.setEnd(bitIndexes.peek()); | ||
if (ptr.isLeaf()) { | ||
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1)); | ||
ptr.setVersion(currentVersion); | ||
} | ||
bitIndexes.advance(); | ||
} | ||
|
||
private void parseList() { | ||
if (ptr.getChildren() == null) { | ||
ptr.setValue(skip(true)); | ||
ptr.setVersion(currentVersion); | ||
return; | ||
} | ||
ptr.setStart(bitIndexes.peek()); | ||
bitIndexes.advance(); | ||
int i = 0; | ||
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != ']') { | ||
parseElement("" + i); | ||
if (buffer[bitIndexes.peek()] == ',') { | ||
bitIndexes.advance(); | ||
} | ||
i++; | ||
} | ||
ptr.setEnd(bitIndexes.peek()); | ||
if (ptr.isLeaf()) { | ||
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1)); | ||
ptr.setVersion(currentVersion); | ||
} | ||
bitIndexes.advance(); | ||
} | ||
|
||
private String skip(boolean retainValue) { | ||
int i = 0; | ||
int start = retainValue ? bitIndexes.peek() : 0; | ||
switch (buffer[bitIndexes.peek()]) { | ||
case '{' -> { | ||
i++; | ||
while (i > 0) { | ||
bitIndexes.advance(); | ||
if (buffer[bitIndexes.peek()] == '{') { | ||
i++; | ||
} else if (buffer[bitIndexes.peek()] == '}') { | ||
i--; | ||
} | ||
} | ||
int end = bitIndexes.peek(); | ||
bitIndexes.advance(); | ||
return retainValue ? new String(buffer, start, end - start + 1) : null; | ||
} | ||
case '[' -> { | ||
i++; | ||
while (i > 0) { | ||
bitIndexes.advance(); | ||
if (buffer[bitIndexes.peek()] == '[') { | ||
i++; | ||
} else if (buffer[bitIndexes.peek()] == ']') { | ||
i--; | ||
} | ||
} | ||
int end = bitIndexes.peek(); | ||
bitIndexes.advance(); | ||
return retainValue ? new String(buffer, start, end - start + 1) : null; | ||
} | ||
default -> { | ||
return parseValue(); | ||
} | ||
} | ||
} | ||
|
||
private String[] getResult() { | ||
for (int i = 0; i < expectParseCols; i++) { | ||
if (row[i].getVersion() < currentVersion) { | ||
result[i] = null; | ||
continue; | ||
} | ||
result[i] = row[i].getValue(); | ||
} | ||
return result; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
package org.simdjson; | ||
|
||
import static org.simdjson.testutils.SimdJsonAssertions.assertThat; | ||
import static org.simdjson.testutils.TestUtils.toUtf8; | ||
|
||
import org.junit.jupiter.api.Test; | ||
|
||
public class JsonMultiValueParsingTest { | ||
@Test | ||
public void testParseMultiValue() { | ||
byte[] json = toUtf8("{\"field1\":{\"field2\":\"value2\",\"field3\":3},\"field4\":[\"value4\",\"value5\"],\"field5\":null}"); | ||
SimdJsonParserWithFixPath parser = new SimdJsonParserWithFixPath("field1.field2", "field1.field3", "field4", "field4.0", "field5"); | ||
String[] result = parser.parse(json, json.length); | ||
assertThat(result[0]).isEqualTo("value2"); | ||
assertThat(result[1]).isEqualTo("3"); | ||
assertThat(result[2]).isEqualTo("[\"value4\",\"value5\"]"); | ||
assertThat(result[3]).isEqualTo("value4"); | ||
assertThat(result[4]).isEqualTo(null); | ||
} | ||
|
||
@Test | ||
public void testNonAsciiCharacters() { | ||
byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4}"); | ||
SimdJsonParserWithFixPath parser = new SimdJsonParserWithFixPath("ąćśńźż", "\\u20A9\\u0E3F", "αβγ", "😀abc😀"); | ||
// when | ||
String[] result = parser.parse(json, json.length); | ||
// then | ||
assertThat(result[0]).isEqualTo("1"); | ||
assertThat(result[1]).isEqualTo("2"); | ||
assertThat(result[2]).isEqualTo("3"); | ||
assertThat(result[3]).isEqualTo("4"); | ||
} | ||
} |