Skip to content

Commit

Permalink
add class,transform yob, taf encoding and seperator
Browse files Browse the repository at this point in the history
  • Loading branch information
nimarion committed Mar 17, 2024
1 parent 2ef432d commit 43e96c6
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 4 deletions.
4 changes: 4 additions & 0 deletions cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,4 +148,8 @@ def performance_to_float(performance):

outputDf = outputDf.drop(columns=['Performance'])

if 'yearOfBirth' in outputDf.columns:
outputDf['yearOfBirth'] = outputDf['yearOfBirth'].fillna(-1).astype(float).astype(int)
outputDf['yearOfBirth'] = outputDf['yearOfBirth'].replace(-1, "")

outputDf.to_csv(file, index=False)
6 changes: 5 additions & 1 deletion filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,8 @@
filtered_input = filtered_input.fillna("")
filtered_input = filtered_input.replace("nan", "")

filtered_input.to_csv(args.output, index=False)
# yearOfBirth as int
if 'yearOfBirth' in filtered_input.columns:
filtered_input['yearOfBirth'] = filtered_input['yearOfBirth'].astype(float).astype(int)

filtered_input.to_csv(args.output, index=False, sep=';', encoding='ansi')
7 changes: 4 additions & 3 deletions taf.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,12 @@

outputDf = pd.concat([outputDf, df])


desired_order = ['code', 'type', 'discipline', 'result', 'wind', 'venue', 'venueCountry', 'venueCity', 'environment', 'date', 'name', 'firstname', 'lastname', 'nation', 'yearOfBirth', 'sex']
outputDf['class'] = outputDf['sex'].apply(lambda x: 'M' if x == 'Male' else ('W' if x == 'Female' else 'X'))

desired_order = ['code', 'type', 'discipline', 'class', 'result', 'wind', 'venue', 'venueCountry', 'environment', 'date', 'name', 'firstname', 'lastname', 'nation', 'yearOfBirth', 'sex']
columns_to_drop = set(df.columns) - set(desired_order)
outputDf = outputDf.drop(columns=columns_to_drop, errors="ignore")

outputDf = outputDf.reindex(columns=desired_order)

outputDf.to_csv(args.output, index=False)
outputDf.to_csv(args.output, index=False, sep=';', encoding='ansi')

0 comments on commit 43e96c6

Please sign in to comment.