Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into release/v0.7.0
Browse files Browse the repository at this point in the history
  • Loading branch information
devinrsmith committed Dec 9, 2021
2 parents 1de945f + ae32bfd commit 2ce3ebe
Show file tree
Hide file tree
Showing 7 changed files with 1,182 additions and 10 deletions.
18 changes: 9 additions & 9 deletions Integrations/python/deephaven/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@

from .csv import read as read_csv
from .csv import write as write_csv
from .conversion_utils import convertToJavaList as as_list

# NB: this must be defined BEFORE importing .jvm_init or .start_jvm (circular import)
def initialize():
Expand Down Expand Up @@ -414,15 +413,16 @@ def doLocked(f, lock_type="shared"):
raise ValueError("Unsupported lock type: lock_type={}".format(lock_type))


def combo_agg(agg_list):
def as_list(values):
"""
Combines aggregations.
Creates a Java list containing the values.
:param agg_list: list of aggregations
:return: combined aggregations
:param values: values
:return: Java list containing the values.
"""
_JArrayList = jpy.get_type("java.util.ArrayList")
j_agg_list = _JArrayList(len(agg_list))
for agg in agg_list:
j_agg_list.add(agg)
return j_agg_list
j_list = _JArrayList(len(values))
for value in values:
j_list.add(value)
return j_list

6 changes: 5 additions & 1 deletion Integrations/python/deephaven/learn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ def _defineSymbols():
raise SystemError("No java functionality can be used until the JVM has been initialized through the jpy module")

global _Input_, _Output_, _Computer_, _Scatterer_

if _Input_ is None:
_Input_ = jpy.get_type("io.deephaven.integrations.learn.Input")
_Output_ = jpy.get_type("io.deephaven.integrations.learn.Output")
_Computer_ = jpy.get_type("io.deephaven.integrations.learn.Computer")
_Scatterer_ = jpy.get_type("io.deephaven.integrations.learn.Scatterer")


# every module method that invokes Java classes should be decorated with @_passThrough
@wrapt.decorator
def _passThrough(wrapped, instance, args, kwargs):
Expand All @@ -55,6 +55,10 @@ def _passThrough(wrapped, instance, args, kwargs):
_defineSymbols()
return wrapped(*args, **kwargs)

try:
_defineSymbols()
except Exception as e:
pass

@_passThrough
class Input:
Expand Down
122 changes: 122 additions & 0 deletions Integrations/python/deephaven/learn/gather/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#
# Copyright (c) 2016 - 2021 Deephaven Data Labs and Patent Pending
#
"""
Utilities for gathering Deephaven table data into Python objects
"""

import numpy as np
import enum
import jpy
import wrapt

# None until the first _defineSymbols() call
_gatherer = None

def _defineSymbols():
if not jpy.has_jvm():
raise SystemError("No java functionality can be used until the JVM has been initialized through the jpy module")

global _gatherer
global Layout

if _gatherer is None:
_gatherer = jpy.get_type("io.deephaven.integrations.learn.gather.NumPy")

class MemoryLayout(enum.Enum):
"""
Memory layouts for an array.
"""
ROW_MAJOR = True
"""Row-major memory layout."""
COLUMN_MAJOR = False
"""Column-major memory layout."""
C = True
"""Memory layout consistent with C arrays (row-major)."""
FORTRAN = False
"""Memory layout consistent with Fortran arrays (column-major)."""

def __init__(self, is_row_major):
self.is_row_major = is_row_major


# Every method that depends on symbols defined via _defineSymbols() should be decorated with @_passThrough
@wrapt.decorator
def _passThrough(wrapped, instance, args, kwargs):
"""
For decoration of module methods, to define necessary symbols at runtime
:param wrapped: the method to be decorated
:param instance: the object to which the wrapped function was bound when it was called
:param args: the argument list for `wrapped`
:param kwargs: the keyword argument dictionary for `wrapped`
:return: the decorated version of the method
"""

_defineSymbols()
return wrapped(*args, **kwargs)

try:
_defineSymbols()
except Exception as e:
pass

@_passThrough
def convert_to_numpy_dtype(dtype):
"""
Convert an input type to the corresponding NumPy data type
:param dtype: A Python type
"""
if dtype.__module__ == np.__name__:
return dtype
elif dtype == bool:
dtype = np.bool_
elif dtype == float:
dtype = np.double
elif dtype == int:
dtype = np.intc
else:
raise ValueError(f"{dtype} is not a data type that can be converted to a NumPy dtype.")
return dtype

@_passThrough
def table_to_numpy_2d(row_set, col_set, order:MemoryLayout = MemoryLayout.ROW_MAJOR, dtype:np.dtype = np.intc):
"""
Convert Deephaven table data to a 2d NumPy array of the appropriate size
:param row_set: A RowSequence describing the number of rows in the table
:param col_set: ColumnSources describing which columns to copy
:param order: :param order: The desired memory layout of the output array
:param dtype: The desired NumPy data type of the output NumPy array
:return: A NumPy ndarray
"""

if not(isinstance(order, MemoryLayout)):
raise ValueError(f"Invalid major order {order}. Please use an enum value from MemoryLayout.")

dtype = convert_to_numpy_dtype(dtype)

if dtype == np.byte:
buffer = _gatherer.tensorBuffer2DByte(row_set, col_set, order.is_row_major)
elif dtype == np.short:
buffer = _gatherer.tensorBuffer2DShort(row_set, col_set, order.is_row_major)
elif dtype == np.intc:
buffer = _gatherer.tensorBuffer2DInt(row_set, col_set, order.is_row_major)
elif dtype == np.int_:
buffer = _gatherer.tensorBuffer2DLong(row_set, col_set, order.is_row_major)
elif dtype == np.single:
buffer = _gatherer.tensorBuffer2DFloat(row_set, col_set, order.is_row_major)
elif dtype == np.double:
buffer = _gatherer.tensorBuffer2DDouble(row_set, col_set, order.is_row_major)
else:
raise ValueError(f"Data type {dtype} is not supported.")

tensor = np.frombuffer(buffer, dtype = dtype)

if order.is_row_major:
tensor.shape = (len(col_set), row_set.intSize())
return tensor.T
else:
tensor.shape = (row_set.intSize(), len(col_set))
return tensor
170 changes: 170 additions & 0 deletions Integrations/python/test/test_learn_gather.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#
# Copyright (c) 2016-2021 Deephaven Data Labs and Patent Pending
#

##############################################################################
# NOTE: the jvm should have been initialized, or this test will certainly fail
##############################################################################

import pandas as pd
import numpy as np
import unittest
import jpy
import sys
import os

from deephaven import learn, tableToDataFrame, TableTools
from deephaven.learn import gather

class TestGather(unittest.TestCase):
"""
Test cases for deephaven.learn submodule
"""

@classmethod
def setUpClass(cls):
"""
Inherited method allowing initialization of test environment
"""
# Tables
cls.bool_table = TableTools.emptyTable(100).update(
"X = true",
"Y = false",
"Z = (i % 2 == 0) ? true : false"
)
cls.byte_table = TableTools.emptyTable(100).update(
"X = (byte)i",
"Y = (byte)(100 - X)",
"Z = (byte)(-101 + X)"
)
cls.short_table = TableTools.emptyTable(100).update(
"X = (short)i",
"Y = (short)(100 - X)",
"Z = (short)(-101 + X)"
)
cls.int_table = TableTools.emptyTable(100).update(
"X = (int)i",
"Y = 100 - X",
"Z = -101 + X"
)
cls.long_table = TableTools.emptyTable(100).update(
"X = (long)i",
"Y = 100 - X",
"Z = -101 + X"
)
cls.float_table = TableTools.emptyTable(100).update(
"X = (float)i",
"Y = (float)sqrt(X)",
"Z = (float)sqrt(Y)"
)
cls.double_table = TableTools.emptyTable(100).update(
"X = (double)i",
"Y = sqrt(X)",
"Z = sqrt(Y)"
)
# NumPy arrays
cls.bool_array = \
np.array([[True, False, True], [True, False, False]] * 50,
dtype = np.bool_)
cls.byte_array = np.vstack((
np.arange(0, 100, dtype = np.byte),
np.arange(100, 0, -1, dtype = np.byte),
np.arange(-101, -1, dtype = np.byte)
)).T
cls.short_array = np.vstack((
np.arange(0, 100, dtype = np.short),
np.arange(100, 0, -1, dtype = np.short),
np.arange(-101, -1, dtype = np.short)
)).T
cls.int_array = np.vstack((
np.arange(0, 100, dtype = np.intc),
np.arange(100, 0, -1, dtype = np.intc),
np.arange(-101, -1, dtype = np.intc)
)).T
cls.long_array = np.vstack((
np.arange(0, 100, dtype = np.int_),
np.arange(100, 0, -1, dtype = np.int_),
np.arange(-101, -1, dtype = np.int_)
)).T
cls.float_array = np.vstack((
np.arange(0, 100, dtype = np.single),
np.sqrt(np.arange(0, 100, dtype = np.single)),
np.sqrt(np.sqrt(np.arange(0, 100, dtype = np.single)))
)).T
cls.double_array = np.vstack((
np.arange(0, 100, dtype = np.double),
np.sqrt(np.arange(0, 100, dtype = np.double)),
np.sqrt(np.sqrt(np.arange(0, 100, dtype = np.double)))
)).T

# Model for learn to use when dtype = [np.bool_]
def boolean_model(self, features):
return np.count_nonzero(features, axis = 1) < 2

# Model for learn to use when dtype = [np.byte, np.short, np.intc, np.int_]
def integer_model(self, features):
return np.sum(features, axis = 1)

# Model for learn to use when dtype = [np.single, np.double]
def decimal_model(self, features):
return np.prod(features, axis = 1)

# Test byte data types
def test_byte(self):
self.base_test(source = self.byte_table, model = self.integer_model, np_dtype = np.byte)

# Test short data types
def test_short(self):
self.base_test(source = self.short_table, model = self.integer_model, np_dtype = np.short)

# Test int data types
def test_int(self):
self.base_test(source = self.int_table, model = self.integer_model, np_dtype = np.intc)

# Test long data types
def test_long(self):
self.base_test(source = self.long_table, model = self.integer_model, np_dtype = np.int_)

# Test float data types
def test_float(self):
self.base_test(source = self.float_table, model = self.decimal_model, np_dtype = np.single)

# Test double data types
def test_double(self):
self.base_test(source = self.double_table, model = self.decimal_model, np_dtype = np.double)

# The base test, which other tests will be built from
def base_test(self, source, model, np_dtype):

rows = source.getRowSet()
cols = [source.getColumnSource(col) for col in ["X", "Y", "Z"]]

gatherer_rowmajor = lambda rowset, colset : gather.table_to_numpy_2d(rowset, colset, gather.MemoryLayout.ROW_MAJOR, np_dtype)
gatherer_colmajor = lambda rowset, colset : gather.table_to_numpy_2d(rowset, colset, gather.MemoryLayout.COLUMN_MAJOR, np_dtype)

array_from_table = tableToDataFrame(source).values

gathered_rowmajor = gatherer_rowmajor(rows, cols)
gathered_colmajor = gatherer_colmajor(rows, cols)

with self.subTest(msg = "Array shape"):
self.assertTrue(gathered_rowmajor.shape == array_from_table.shape)
print("Row major gathered shape: {}".format(gathered_rowmajor.shape))
self.assertTrue(gathered_colmajor.shape == array_from_table.shape)
print("Column major gathered shape: {}".format(gathered_colmajor.shape))
with self.subTest(msg = "Values in array"):
self.assertTrue(np.allclose(gathered_rowmajor, array_from_table))
print("All row-major array values are equal")
self.assertTrue(np.allclose(gathered_colmajor, array_from_table))
print("All column-major array values are equal")
with self.subTest(msg = "Array data type"):
self.assertTrue(gathered_rowmajor.dtype == np_dtype)
self.assertTrue(gathered_rowmajor.dtype == array_from_table.dtype)
self.assertTrue(gathered_colmajor.dtype == np_dtype)
self.assertTrue(gathered_colmajor.dtype == array_from_table.dtype)
self.assertTrue(gathered_rowmajor.dtype == gathered_colmajor.dtype)
print("Array dtype: {}".format(np_dtype))
with self.subTest(msg = "Contiguity"):
self.assertTrue(gathered_rowmajor.flags["C_CONTIGUOUS"] or gathered_rowmajor.flags["F_CONTIGUOUS"])
self.assertTrue(gathered_colmajor.flags["C_CONTIGUOUS"] or gathered_colmajor.flags["F_CONTIGUOUS"])
print("Array contiguity checked")
39 changes: 39 additions & 0 deletions Integrations/python/test/test_sort.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#
# Copyright (c) 2016-2021 Deephaven Data Labs and Patent Pending
#

# Test python sort syntax

import unittest
from unittest import TestCase
from deephaven import as_list, SortColumn, ColumnName
from deephaven.TableTools import newTable, intCol, diff

class SortTestCase(TestCase):
def test_sort_syntax(self):
source = newTable(
intCol("A", 1, 2, 3, 1, 2, 3),
intCol("B", 0, 1, 0, 1, 0, 1),
intCol("C", 1, 2, 3, 4, 5, 6),
)

sort_columns = as_list([
SortColumn.asc(ColumnName.of("A")),
SortColumn.desc(ColumnName.of("B"))
])

actual = source.sort(sort_columns)

target = newTable(
intCol("A", 1, 1, 2, 2, 3, 3),
intCol("B", 1, 0, 1, 0, 1, 0),
intCol("C", 4, 1, 2, 5, 6, 3),
)

diff_string = diff(actual, target, 1)
self.assertEqual("", diff_string)



if __name__ == '__main__':
unittest.main()
Loading

0 comments on commit 2ce3ebe

Please sign in to comment.