Skip to content

Commit

Permalink
Use h5py.string_dtype() for all string arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
stuart-cls committed Sep 27, 2024
1 parent 7db9727 commit c872d57
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions Orange/data/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,18 +596,19 @@ def parse(attr):
f.attrs['Orange_version'] = ORANGE_VERSION
f.attrs['HDF5_Version'] = h5py.version.hdf5_version
f.attrs['h5py_version'] = h5py.version.version
str_dtype = h5py.string_dtype()
for subdomain in ['attributes', 'class_vars', 'metas']:
parsed = [parse(feature) for feature in getattr(data.domain, subdomain)]
domain = np.array([[name, header] for name, header, _ in parsed], 'S')
domain_args = np.array([json.dumps(args) for *_, args in parsed], 'S')
domain = np.array([[name, header] for name, header, _ in parsed], dtype=str_dtype)
domain_args = np.array([json.dumps(args) for *_, args in parsed], dtype=str_dtype)
f.create_dataset(f'domain/{subdomain}', data=domain)
f.create_dataset(f'domain/{subdomain}_args', data=domain_args)
f.create_dataset("X", data=data.X)
if data.Y.size:
f.create_dataset("Y", data=data.Y)
if data.metas.size:
for i, attr in enumerate(data.domain.metas):
col_type = h5py.string_dtype() if isinstance(attr, StringVariable) else 'f'
col_type = str_dtype if isinstance(attr, StringVariable) else 'f'
col_data = data.metas[:, [i]].astype(col_type)
if col_type != 'f':
col_data[pd.isnull(col_data)] = ""
Expand Down

0 comments on commit c872d57

Please sign in to comment.