Skip to content

Commit

Permalink
Release/1.4.3 (#380)
Browse files Browse the repository at this point in the history
* fix: performance improvements (#374)
  • Loading branch information
ralphrass authored Oct 8, 2024
1 parent 3214783 commit f9c5fe4
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 11 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ Preferably use **Added**, **Changed**, **Removed** and **Fixed** topics in each

## [Unreleased]

## [1.4.3](https://github.com/quintoandar/butterfree/releases/tag/1.4.3)
* Performance upgrade ([#378](https://github.com/quintoandar/butterfree/pull/378))

## [1.4.2](https://github.com/quintoandar/butterfree/releases/tag/1.4.2)
* Minor fixes ([#374](https://github.com/quintoandar/butterfree/pull/376))
* Minor fixes ([#376](https://github.com/quintoandar/butterfree/pull/376))

## [1.4.1](https://github.com/quintoandar/butterfree/releases/tag/1.4.1)
* Performance Improvements ([#374](https://github.com/quintoandar/butterfree/pull/374))
Expand Down
21 changes: 19 additions & 2 deletions butterfree/migrations/database_migration/cassandra_migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ def _get_alter_table_add_query(self, columns: List[Diff], table_name: str) -> st
def _get_alter_column_type_query(self, column: Diff, table_name: str) -> str:
"""Creates CQL statement to alter columns' types.
In Cassandra 3.4.x to 3.11.x alter type is not allowed.
This method creates a temp column to comply.
Args:
columns: list of Diff objects with ALTER_TYPE kind.
table_name: table name.
Expand All @@ -86,10 +89,24 @@ def _get_alter_column_type_query(self, column: Diff, table_name: str) -> str:
Alter column type query.
"""
parsed_columns = self._get_parsed_columns([column])

temp_column_name = f"{column.column}_temp"

add_temp_column_query = (
f"ALTER TABLE {table_name} ADD {temp_column_name} {column.value};"
)
copy_data_to_temp_query = (
f"UPDATE {table_name} SET {temp_column_name} = {column.column};"
)

drop_old_column_query = f"ALTER TABLE {table_name} DROP {column.column};"
rename_temp_column_query = (
f"ALTER TABLE {table_name} RENAME {temp_column_name} TO {column.column};"
)

return (
f"ALTER TABLE {table_name} ALTER {parsed_columns.replace(' ', ' TYPE ')};"
f"{add_temp_column_query} {copy_data_to_temp_query} "
f"{drop_old_column_query} {rename_temp_column_query};"
)

@staticmethod
Expand Down
21 changes: 14 additions & 7 deletions butterfree/transform/aggregated_feature_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,14 +576,16 @@ def construct(

pre_hook_df = self.run_pre_hooks(dataframe)

output_df = reduce(
lambda df, feature: feature.transform(df),
self.keys + [self.timestamp],
pre_hook_df,
output_df = pre_hook_df
for feature in self.keys + [self.timestamp]:
output_df = feature.transform(output_df)

output_df = self.incremental_strategy.filter_with_incremental_strategy(
dataframe=output_df, start_date=start_date, end_date=end_date
)

if self._windows and end_date is not None:
# run aggregations for each window
# Run aggregations for each window
agg_list = [
self._aggregate(
dataframe=output_df,
Expand All @@ -609,7 +611,7 @@ def construct(
client=client, dataframe=output_df, end_date=end_date
)

# left join each aggregation result to our base dataframe
# Left join each aggregation result to our base dataframe
output_df = reduce(
lambda left, right: self._dataframe_join(
left,
Expand Down Expand Up @@ -648,7 +650,12 @@ def construct(

post_hook_df = self.run_post_hooks(output_df)

# Eager evaluation, only if needed and managable
if not output_df.isStreaming and self.eager_evaluation:
post_hook_df.cache().count()
# Small dataframes only
if output_df.count() < 1_000_000:
post_hook_df.cache().count()
else:
post_hook_df.cache() # Cache without materialization for large volumes

return post_hook_df
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import find_packages, setup

__package_name__ = "butterfree"
__version__ = "1.4.2"
__version__ = "1.4.3"
__repository_url__ = "https://github.com/quintoandar/butterfree"

with open("requirements.txt") as f:
Expand Down

0 comments on commit f9c5fe4

Please sign in to comment.