-
Notifications
You must be signed in to change notification settings - Fork 80
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'upstream/main' into release/v0.7.0
- Loading branch information
Showing
7 changed files
with
1,182 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
# | ||
# Copyright (c) 2016 - 2021 Deephaven Data Labs and Patent Pending | ||
# | ||
""" | ||
Utilities for gathering Deephaven table data into Python objects | ||
""" | ||
|
||
import numpy as np | ||
import enum | ||
import jpy | ||
import wrapt | ||
|
||
# None until the first _defineSymbols() call | ||
_gatherer = None | ||
|
||
def _defineSymbols(): | ||
if not jpy.has_jvm(): | ||
raise SystemError("No java functionality can be used until the JVM has been initialized through the jpy module") | ||
|
||
global _gatherer | ||
global Layout | ||
|
||
if _gatherer is None: | ||
_gatherer = jpy.get_type("io.deephaven.integrations.learn.gather.NumPy") | ||
|
||
class MemoryLayout(enum.Enum): | ||
""" | ||
Memory layouts for an array. | ||
""" | ||
ROW_MAJOR = True | ||
"""Row-major memory layout.""" | ||
COLUMN_MAJOR = False | ||
"""Column-major memory layout.""" | ||
C = True | ||
"""Memory layout consistent with C arrays (row-major).""" | ||
FORTRAN = False | ||
"""Memory layout consistent with Fortran arrays (column-major).""" | ||
|
||
def __init__(self, is_row_major): | ||
self.is_row_major = is_row_major | ||
|
||
|
||
# Every method that depends on symbols defined via _defineSymbols() should be decorated with @_passThrough | ||
@wrapt.decorator | ||
def _passThrough(wrapped, instance, args, kwargs): | ||
""" | ||
For decoration of module methods, to define necessary symbols at runtime | ||
:param wrapped: the method to be decorated | ||
:param instance: the object to which the wrapped function was bound when it was called | ||
:param args: the argument list for `wrapped` | ||
:param kwargs: the keyword argument dictionary for `wrapped` | ||
:return: the decorated version of the method | ||
""" | ||
|
||
_defineSymbols() | ||
return wrapped(*args, **kwargs) | ||
|
||
try: | ||
_defineSymbols() | ||
except Exception as e: | ||
pass | ||
|
||
@_passThrough | ||
def convert_to_numpy_dtype(dtype): | ||
""" | ||
Convert an input type to the corresponding NumPy data type | ||
:param dtype: A Python type | ||
""" | ||
if dtype.__module__ == np.__name__: | ||
return dtype | ||
elif dtype == bool: | ||
dtype = np.bool_ | ||
elif dtype == float: | ||
dtype = np.double | ||
elif dtype == int: | ||
dtype = np.intc | ||
else: | ||
raise ValueError(f"{dtype} is not a data type that can be converted to a NumPy dtype.") | ||
return dtype | ||
|
||
@_passThrough | ||
def table_to_numpy_2d(row_set, col_set, order:MemoryLayout = MemoryLayout.ROW_MAJOR, dtype:np.dtype = np.intc): | ||
""" | ||
Convert Deephaven table data to a 2d NumPy array of the appropriate size | ||
:param row_set: A RowSequence describing the number of rows in the table | ||
:param col_set: ColumnSources describing which columns to copy | ||
:param order: :param order: The desired memory layout of the output array | ||
:param dtype: The desired NumPy data type of the output NumPy array | ||
:return: A NumPy ndarray | ||
""" | ||
|
||
if not(isinstance(order, MemoryLayout)): | ||
raise ValueError(f"Invalid major order {order}. Please use an enum value from MemoryLayout.") | ||
|
||
dtype = convert_to_numpy_dtype(dtype) | ||
|
||
if dtype == np.byte: | ||
buffer = _gatherer.tensorBuffer2DByte(row_set, col_set, order.is_row_major) | ||
elif dtype == np.short: | ||
buffer = _gatherer.tensorBuffer2DShort(row_set, col_set, order.is_row_major) | ||
elif dtype == np.intc: | ||
buffer = _gatherer.tensorBuffer2DInt(row_set, col_set, order.is_row_major) | ||
elif dtype == np.int_: | ||
buffer = _gatherer.tensorBuffer2DLong(row_set, col_set, order.is_row_major) | ||
elif dtype == np.single: | ||
buffer = _gatherer.tensorBuffer2DFloat(row_set, col_set, order.is_row_major) | ||
elif dtype == np.double: | ||
buffer = _gatherer.tensorBuffer2DDouble(row_set, col_set, order.is_row_major) | ||
else: | ||
raise ValueError(f"Data type {dtype} is not supported.") | ||
|
||
tensor = np.frombuffer(buffer, dtype = dtype) | ||
|
||
if order.is_row_major: | ||
tensor.shape = (len(col_set), row_set.intSize()) | ||
return tensor.T | ||
else: | ||
tensor.shape = (row_set.intSize(), len(col_set)) | ||
return tensor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
# | ||
# Copyright (c) 2016-2021 Deephaven Data Labs and Patent Pending | ||
# | ||
|
||
############################################################################## | ||
# NOTE: the jvm should have been initialized, or this test will certainly fail | ||
############################################################################## | ||
|
||
import pandas as pd | ||
import numpy as np | ||
import unittest | ||
import jpy | ||
import sys | ||
import os | ||
|
||
from deephaven import learn, tableToDataFrame, TableTools | ||
from deephaven.learn import gather | ||
|
||
class TestGather(unittest.TestCase): | ||
""" | ||
Test cases for deephaven.learn submodule | ||
""" | ||
|
||
@classmethod | ||
def setUpClass(cls): | ||
""" | ||
Inherited method allowing initialization of test environment | ||
""" | ||
# Tables | ||
cls.bool_table = TableTools.emptyTable(100).update( | ||
"X = true", | ||
"Y = false", | ||
"Z = (i % 2 == 0) ? true : false" | ||
) | ||
cls.byte_table = TableTools.emptyTable(100).update( | ||
"X = (byte)i", | ||
"Y = (byte)(100 - X)", | ||
"Z = (byte)(-101 + X)" | ||
) | ||
cls.short_table = TableTools.emptyTable(100).update( | ||
"X = (short)i", | ||
"Y = (short)(100 - X)", | ||
"Z = (short)(-101 + X)" | ||
) | ||
cls.int_table = TableTools.emptyTable(100).update( | ||
"X = (int)i", | ||
"Y = 100 - X", | ||
"Z = -101 + X" | ||
) | ||
cls.long_table = TableTools.emptyTable(100).update( | ||
"X = (long)i", | ||
"Y = 100 - X", | ||
"Z = -101 + X" | ||
) | ||
cls.float_table = TableTools.emptyTable(100).update( | ||
"X = (float)i", | ||
"Y = (float)sqrt(X)", | ||
"Z = (float)sqrt(Y)" | ||
) | ||
cls.double_table = TableTools.emptyTable(100).update( | ||
"X = (double)i", | ||
"Y = sqrt(X)", | ||
"Z = sqrt(Y)" | ||
) | ||
# NumPy arrays | ||
cls.bool_array = \ | ||
np.array([[True, False, True], [True, False, False]] * 50, | ||
dtype = np.bool_) | ||
cls.byte_array = np.vstack(( | ||
np.arange(0, 100, dtype = np.byte), | ||
np.arange(100, 0, -1, dtype = np.byte), | ||
np.arange(-101, -1, dtype = np.byte) | ||
)).T | ||
cls.short_array = np.vstack(( | ||
np.arange(0, 100, dtype = np.short), | ||
np.arange(100, 0, -1, dtype = np.short), | ||
np.arange(-101, -1, dtype = np.short) | ||
)).T | ||
cls.int_array = np.vstack(( | ||
np.arange(0, 100, dtype = np.intc), | ||
np.arange(100, 0, -1, dtype = np.intc), | ||
np.arange(-101, -1, dtype = np.intc) | ||
)).T | ||
cls.long_array = np.vstack(( | ||
np.arange(0, 100, dtype = np.int_), | ||
np.arange(100, 0, -1, dtype = np.int_), | ||
np.arange(-101, -1, dtype = np.int_) | ||
)).T | ||
cls.float_array = np.vstack(( | ||
np.arange(0, 100, dtype = np.single), | ||
np.sqrt(np.arange(0, 100, dtype = np.single)), | ||
np.sqrt(np.sqrt(np.arange(0, 100, dtype = np.single))) | ||
)).T | ||
cls.double_array = np.vstack(( | ||
np.arange(0, 100, dtype = np.double), | ||
np.sqrt(np.arange(0, 100, dtype = np.double)), | ||
np.sqrt(np.sqrt(np.arange(0, 100, dtype = np.double))) | ||
)).T | ||
|
||
# Model for learn to use when dtype = [np.bool_] | ||
def boolean_model(self, features): | ||
return np.count_nonzero(features, axis = 1) < 2 | ||
|
||
# Model for learn to use when dtype = [np.byte, np.short, np.intc, np.int_] | ||
def integer_model(self, features): | ||
return np.sum(features, axis = 1) | ||
|
||
# Model for learn to use when dtype = [np.single, np.double] | ||
def decimal_model(self, features): | ||
return np.prod(features, axis = 1) | ||
|
||
# Test byte data types | ||
def test_byte(self): | ||
self.base_test(source = self.byte_table, model = self.integer_model, np_dtype = np.byte) | ||
|
||
# Test short data types | ||
def test_short(self): | ||
self.base_test(source = self.short_table, model = self.integer_model, np_dtype = np.short) | ||
|
||
# Test int data types | ||
def test_int(self): | ||
self.base_test(source = self.int_table, model = self.integer_model, np_dtype = np.intc) | ||
|
||
# Test long data types | ||
def test_long(self): | ||
self.base_test(source = self.long_table, model = self.integer_model, np_dtype = np.int_) | ||
|
||
# Test float data types | ||
def test_float(self): | ||
self.base_test(source = self.float_table, model = self.decimal_model, np_dtype = np.single) | ||
|
||
# Test double data types | ||
def test_double(self): | ||
self.base_test(source = self.double_table, model = self.decimal_model, np_dtype = np.double) | ||
|
||
# The base test, which other tests will be built from | ||
def base_test(self, source, model, np_dtype): | ||
|
||
rows = source.getRowSet() | ||
cols = [source.getColumnSource(col) for col in ["X", "Y", "Z"]] | ||
|
||
gatherer_rowmajor = lambda rowset, colset : gather.table_to_numpy_2d(rowset, colset, gather.MemoryLayout.ROW_MAJOR, np_dtype) | ||
gatherer_colmajor = lambda rowset, colset : gather.table_to_numpy_2d(rowset, colset, gather.MemoryLayout.COLUMN_MAJOR, np_dtype) | ||
|
||
array_from_table = tableToDataFrame(source).values | ||
|
||
gathered_rowmajor = gatherer_rowmajor(rows, cols) | ||
gathered_colmajor = gatherer_colmajor(rows, cols) | ||
|
||
with self.subTest(msg = "Array shape"): | ||
self.assertTrue(gathered_rowmajor.shape == array_from_table.shape) | ||
print("Row major gathered shape: {}".format(gathered_rowmajor.shape)) | ||
self.assertTrue(gathered_colmajor.shape == array_from_table.shape) | ||
print("Column major gathered shape: {}".format(gathered_colmajor.shape)) | ||
with self.subTest(msg = "Values in array"): | ||
self.assertTrue(np.allclose(gathered_rowmajor, array_from_table)) | ||
print("All row-major array values are equal") | ||
self.assertTrue(np.allclose(gathered_colmajor, array_from_table)) | ||
print("All column-major array values are equal") | ||
with self.subTest(msg = "Array data type"): | ||
self.assertTrue(gathered_rowmajor.dtype == np_dtype) | ||
self.assertTrue(gathered_rowmajor.dtype == array_from_table.dtype) | ||
self.assertTrue(gathered_colmajor.dtype == np_dtype) | ||
self.assertTrue(gathered_colmajor.dtype == array_from_table.dtype) | ||
self.assertTrue(gathered_rowmajor.dtype == gathered_colmajor.dtype) | ||
print("Array dtype: {}".format(np_dtype)) | ||
with self.subTest(msg = "Contiguity"): | ||
self.assertTrue(gathered_rowmajor.flags["C_CONTIGUOUS"] or gathered_rowmajor.flags["F_CONTIGUOUS"]) | ||
self.assertTrue(gathered_colmajor.flags["C_CONTIGUOUS"] or gathered_colmajor.flags["F_CONTIGUOUS"]) | ||
print("Array contiguity checked") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# | ||
# Copyright (c) 2016-2021 Deephaven Data Labs and Patent Pending | ||
# | ||
|
||
# Test python sort syntax | ||
|
||
import unittest | ||
from unittest import TestCase | ||
from deephaven import as_list, SortColumn, ColumnName | ||
from deephaven.TableTools import newTable, intCol, diff | ||
|
||
class SortTestCase(TestCase): | ||
def test_sort_syntax(self): | ||
source = newTable( | ||
intCol("A", 1, 2, 3, 1, 2, 3), | ||
intCol("B", 0, 1, 0, 1, 0, 1), | ||
intCol("C", 1, 2, 3, 4, 5, 6), | ||
) | ||
|
||
sort_columns = as_list([ | ||
SortColumn.asc(ColumnName.of("A")), | ||
SortColumn.desc(ColumnName.of("B")) | ||
]) | ||
|
||
actual = source.sort(sort_columns) | ||
|
||
target = newTable( | ||
intCol("A", 1, 1, 2, 2, 3, 3), | ||
intCol("B", 1, 0, 1, 0, 1, 0), | ||
intCol("C", 4, 1, 2, 5, 6, 3), | ||
) | ||
|
||
diff_string = diff(actual, target, 1) | ||
self.assertEqual("", diff_string) | ||
|
||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |
Oops, something went wrong.