diff --git a/src/main/java/com/exacaster/deltafetch/search/parquet/readsupport/MapConverter.java b/src/main/java/com/exacaster/deltafetch/search/parquet/readsupport/MapConverter.java index b95a023..2ee2bb9 100644 --- a/src/main/java/com/exacaster/deltafetch/search/parquet/readsupport/MapConverter.java +++ b/src/main/java/com/exacaster/deltafetch/search/parquet/readsupport/MapConverter.java @@ -1,5 +1,7 @@ package com.exacaster.deltafetch.search.parquet.readsupport; +import java.math.BigDecimal; +import java.math.BigInteger; import org.apache.parquet.io.api.Binary; import org.apache.parquet.io.api.Converter; import org.apache.parquet.io.api.GroupConverter; @@ -51,7 +53,7 @@ public Optional visit( @Override public Optional visit( LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) { - return of(new SimplePrimitiveConverter(field.getName())); + return of(new DecimalConverter(field.getName(), decimalLogicalType.getScale())); } }).orElse(new SimplePrimitiveConverter(field.getName())); } @@ -149,4 +151,28 @@ public void addBinary(Binary value) { array.add(value.toStringUsingUTF8()); } } + + private class DecimalConverter extends PrimitiveConverter { + private final String name; + private final int scale; + + public DecimalConverter(String name, int scale) { + this.name = name; + this.scale = scale; + } + + @Override + public void addBinary(Binary value) { + BigInteger unscaledValue = new BigInteger(value.getBytes()); + BigDecimal decimalValue = new BigDecimal(unscaledValue, scale); + record.put(name, decimalValue); + } + + @Override + public void addLong(long value) { + BigInteger unscaledValue = BigInteger.valueOf(value); + BigDecimal decimalValue = new BigDecimal(unscaledValue, scale); + record.put(name, decimalValue); + } + } } \ No newline at end of file diff --git a/src/test/groovy/com/exacaster/deltafetch/search/SearchServiceTest.groovy b/src/test/groovy/com/exacaster/deltafetch/search/SearchServiceTest.groovy index 7897e12..67abd82 100644 --- a/src/test/groovy/com/exacaster/deltafetch/search/SearchServiceTest.groovy +++ b/src/test/groovy/com/exacaster/deltafetch/search/SearchServiceTest.groovy @@ -30,4 +30,28 @@ class SearchServiceTest extends Specification { then: "returns empty" result.isEmpty() } + def worksWithDifferentTypes() { + given: + def cache = Mock(SyncCache) { + get(*_) >> Optional.empty() + } + def conf = new Configuration() + def statsReader = new DeltaMetaReader(conf, cache) + def svc = new SearchService(statsReader, new Configuration()) + def path = getClass().getResource("/test_data_types").toString() + + when: + def result = svc.find(path, [new ColumnValueFilter("user_id", "555")], true, 1).findFirst() + + then: + result.isPresent() + result.get().getValue().get("trait_string") == "ACTIVE" + result.get().getValue().get("trait_decimal18_3") == 5.555 + result.get().getValue().get("trait_decimal21_3") == 55.123 + result.get().getValue().get("trait_double") == 12345.678 + result.get().getValue().get("trait_float") == 12345.678F + result.get().getValue().get("trait_int") == 12345 + result.get().getValue().get("trait_bigint") == 123456789012345 + result.get().getValue().get("trait_boolean") == true + } } diff --git a/src/test/resources/test_data_types/_delta_log/00000000000000000000.json b/src/test/resources/test_data_types/_delta_log/00000000000000000000.json new file mode 100644 index 0000000..beffb30 --- /dev/null +++ b/src/test/resources/test_data_types/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1721901554498,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2873"},"engineInfo":"Apache-Spark/3.4.2 Delta-Lake/2.4.0","txnId":"b5285209-e1ef-4844-8c25-9353a33e2b75"}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"17da5576-7e97-4e09-a01d-74f679b262e8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"user_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_decimal18_3\",\"type\":\"decimal(18,3)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_decimal21_3\",\"type\":\"decimal(21,3)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_double\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_float\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_int\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_bigint\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"trait_boolean\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1721901552277}} +{"add":{"path":"part-00000-d31af104-2315-4f5a-9a24-0e41fa1c58f2-c000.snappy.parquet","partitionValues":{},"size":2873,"modificationTime":1721901554000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"user_id\":\"111\",\"trait_string\":\"ACTIVE\",\"trait_decimal18_3\":1.111,\"trait_decimal21_3\":11.123,\"trait_double\":3456.78,\"trait_float\":12345.678,\"trait_int\":12345,\"trait_bigint\":123456789012345},\"maxValues\":{\"user_id\":\"555\",\"trait_string\":\"DEACTIVATED\",\"trait_decimal18_3\":5.555,\"trait_decimal21_3\":55.123,\"trait_double\":12345.678,\"trait_float\":98765.43,\"trait_int\":98765,\"trait_bigint\":987654321098765},\"nullCount\":{\"user_id\":0,\"trait_string\":0,\"trait_decimal18_3\":0,\"trait_decimal21_3\":0,\"trait_double\":0,\"trait_float\":0,\"trait_int\":0,\"trait_bigint\":0,\"trait_boolean\":0}}"}} diff --git a/src/test/resources/test_data_types/part-00000-d31af104-2315-4f5a-9a24-0e41fa1c58f2-c000.snappy.parquet b/src/test/resources/test_data_types/part-00000-d31af104-2315-4f5a-9a24-0e41fa1c58f2-c000.snappy.parquet new file mode 100644 index 0000000..54d4725 Binary files /dev/null and b/src/test/resources/test_data_types/part-00000-d31af104-2315-4f5a-9a24-0e41fa1c58f2-c000.snappy.parquet differ