Fix some warnings in the titanic sample (#24)

datamindedbe · Nov 6, 2023 · ca2c8ed · ca2c8ed
1 parent 8d6e79e
commit ca2c8ed
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 24 deletions.
diff --git a/mlops/titanic/notebooks/exploration.ipynb b/mlops/titanic/notebooks/exploration.ipynb
diff --git a/mlops/titanic/notebooks/model.ipynb b/mlops/titanic/notebooks/model.ipynb
@@ -296,7 +296,7 @@
    ],
    "source": [
     "boto3.setup_default_session()\n",
-    "ssm = boto3.client('ssm')\n",
+    "ssm = boto3.client('ssm', region_name='eu-west-1')\n",
     "parameter = ssm.get_parameter(Name='/conveyor-samples/bucket/name')\n",
     "bucket = parameter['Parameter']['Value']\n",
     "\n",

diff --git a/mlops/titanic/src/titanic/config.py b/mlops/titanic/src/titanic/config.py
@@ -1,7 +1,7 @@
 import argparse
 
 class Config:
-    def __init__(self, asset: str, date: str):
+    def __init__(self, *, asset: str, date: str):
         self.date = date
         self.asset = asset
 
@@ -15,4 +15,4 @@ def parse_args() -> Config:
         "-a", "--asset", dest="asset", help="Asset you want to ingest or load", required=False
     )
     args = parser.parse_args()
-    return Config(args.asset, args.date)
+    return Config(asset=args.asset, date=args.date)
diff --git a/mlops/titanic/src/titanic/jobs/prepare.py b/mlops/titanic/src/titanic/jobs/prepare.py
@@ -45,7 +45,7 @@ def _is_nan(x):
 
 def add_categorical_fare_feature(df: pd.DataFrame):
     df['Fare'] = df['Fare']. \
-        groupby([df['SexNumerical'], df['Pclass']]). \
+        groupby([df['SexNumerical'], df['Pclass']], group_keys=False). \
         apply(lambda x: x.fillna(x.median()))
     df['CategoricalFare'] = pd.qcut(df['Fare'], 4, labels = [0, 1, 2, 3]).astype(int)
 
@@ -58,7 +58,7 @@ def add_gender_feature(df: pd.DataFrame):
 
 def add_age_feature(df: pd.DataFrame):
     df['Age'] = df['Age']. \
-        groupby([df['SexNumerical'], df['Pclass']]). \
+        groupby([df['SexNumerical'], df['Pclass']], group_keys=False). \
         apply(lambda x: x.fillna(x.median()))