-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added bio.ipynb script and updated exec team to FA24
- Loading branch information
Edward Lee
committed
Sep 11, 2024
1 parent
2892a94
commit 3f4f789
Showing
2 changed files
with
288 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,283 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 55, | ||
"id": "00904cf7-2a43-4c2d-824a-86616c643196", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#!/usr/bin/env python3\n", | ||
"\n", | ||
"\"\"\"\n", | ||
"Generates JSON blobs of bios.\n", | ||
"\n", | ||
"The bios CSV should contain at least the following columns (? means optional):\n", | ||
" email | name | role | course | preferred name? | pronouns | photo url | bio | website url?\n", | ||
"The single roster CSV should contain the following columns in order, with no header row:\n", | ||
" name | email | role | course\n", | ||
"The exec CSV should contain the following columns in order, again with no header row:\n", | ||
" name | email | role\n", | ||
"\n", | ||
"It is recommended to first do some preprocessing within Google Sheets to obtain\n", | ||
"the desired columns or filter any unwanted entries.\n", | ||
"\"\"\"\n", | ||
"\n", | ||
"import csv\n", | ||
"import json\n", | ||
"\n", | ||
"CURR_SEMESTER = \"fa24\" # CHANGE ME\n", | ||
"\n", | ||
"BIOS_PATH = \"../csvs/bios.csv\"\n", | ||
"ROSTER_PATH = \"../csvs/roster.csv\"\n", | ||
"DEST_PATH = \"../src/data/bios/mentors.json\"\n", | ||
"EXEC_ROLE_PATH = \"../csvs/exec_roles.csv\"\n", | ||
"IMG_PATH = \"../csvs/img.csv\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 56, | ||
"id": "37dc22ea-a1c9-4f51-9259-a6643da8434d", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"class Cols:\n", | ||
" \"\"\"\n", | ||
" The headers of each column as they appear in the CSV.\n", | ||
" \"\"\"\n", | ||
" EMAIL = \"Berkeley email\"\n", | ||
" NAME = \"Name\"\n", | ||
" ROLE = \"For which position are you accepting/rejecting?\"\n", | ||
" COURSE = \"Which course are you accepting for? \"\n", | ||
" PRONOUNS = \"Pronouns\"\n", | ||
" PREF_NAME = \"Preferred Name\"\n", | ||
" IMG_URL = \"Photo\"\n", | ||
" BIO = \"Biography (300 character limit)\"\n", | ||
" WEB_URL = \"(Optional) Website\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 57, | ||
"id": "999bd421-c99f-458f-bcb0-cdc12c00af80", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# This string in the course means we should skip them and move on with life\n", | ||
"NORMALIZED_REJECTIONS = {\n", | ||
" \"iamrejectingallpositionsthatididnotexplicitlyaccept\",\n", | ||
" \"iamrejectingallampositionsthatididnotexplicitlyaccept\",\n", | ||
" \"iamrejectingallcmpositionsthatididnotexplicitlyaccept\"\n", | ||
"}\n", | ||
"\n", | ||
"# global variables lmao\n", | ||
"exec_bios = {} # Written into src/data/bios/exec.json\n", | ||
"exec_roles = {} # Written into src/data/team/[SEMESTER].json" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 58, | ||
"id": "8884c799-20a8-4cb6-8d3c-643ade3a7c0d", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def parse_bios(csv_path, master_roster_path):\n", | ||
" \"\"\"\n", | ||
" Reads bios from the given CSV, returning a dictionary of data keyed by emails.\n", | ||
" \"\"\"\n", | ||
" # Start by keying on email without periods so we can find duplicates easily\n", | ||
" people_by_email = {}\n", | ||
" with open(EXEC_ROLE_PATH) as f:\n", | ||
" reader = csv.reader(f)\n", | ||
" for name, email, role in reader:\n", | ||
" email_no_dot = email.replace(\".\", \"\").lower().strip()\n", | ||
" # We'll assume nobody is in multiple exec roles\n", | ||
" exec_roles[email_no_dot] = {\n", | ||
" \"name\": name,\n", | ||
" \"imgUrl\": \"\",\n", | ||
" \"position\": role\n", | ||
" }\n", | ||
" exec_bios[email_no_dot] = {\n", | ||
" \"name\": name,\n", | ||
" \"role\": role,\n", | ||
" \"imgUrl\": \"\"\n", | ||
" }\n", | ||
"\n", | ||
" with open(master_roster_path) as f:\n", | ||
" reader = csv.reader(f)\n", | ||
" for row in reader:\n", | ||
" name, email, role, preproc_course = row\n", | ||
" course = preproc_course.lower().replace(\" \", \"\")\n", | ||
" email_no_dot = email.replace(\".\", \"\").lower().strip()\n", | ||
" if not role:\n", | ||
" print(f\"=== WARNING: EMPTY ROLE IN MASTER ROSTER FOR {email.strip()} ===\")\n", | ||
" if not course:\n", | ||
" if email_no_dot not in exec_bios:\n", | ||
" print(f\"=== WARNING: EMPTY COURSE IN MASTER ROSTER FOR {email.strip()} AS {role} ===\")\n", | ||
" continue # skip exec because they're already in exec roster\n", | ||
" if role.lower() == \"coordinator\":\n", | ||
" continue # also skip coords because they're already in the exec roster\n", | ||
" if email_no_dot not in people_by_email:\n", | ||
" people_by_email[email_no_dot] = {\n", | ||
" \"name\": name,\n", | ||
" \"courses\": {course: role},\n", | ||
" }\n", | ||
" else:\n", | ||
" obj = people_by_email[email_no_dot]\n", | ||
" obj[\"name\"] = name\n", | ||
" obj[\"courses\"][course] = role\n", | ||
"\n", | ||
" with open(csv_path) as f:\n", | ||
" reader = csv.DictReader(f)\n", | ||
" for row in reader:\n", | ||
" email = row[Cols.EMAIL]\n", | ||
" email_no_dot = email.replace(\".\", \"\").lower().strip()\n", | ||
" pref_name = row[Cols.PREF_NAME]\n", | ||
" use_pref_name = pref_name and not pref_name.isspace()\n", | ||
" name = row[Cols.NAME] if not use_pref_name else pref_name\n", | ||
" # photo_url = row[Cols.IMG_URL]\n", | ||
" bio = row[Cols.BIO]\n", | ||
" course = row[Cols.COURSE].lower().replace(\" \", \"\").strip()\n", | ||
" role = row[Cols.ROLE]\n", | ||
" pronouns = row[Cols.PRONOUNS]\n", | ||
" web_url = row[Cols.WEB_URL]\n", | ||
" def update(email_no_dot):\n", | ||
" # Assume the latest version of the bio is correct\n", | ||
" obj = people_by_email[email_no_dot]\n", | ||
" if use_pref_name:\n", | ||
" obj[\"name\"] = name\n", | ||
" if pronouns and not pronouns.isspace():\n", | ||
" obj[\"pronouns\"] = pronouns\n", | ||
" if course and not course.isspace():\n", | ||
" if \"courses\" not in obj:\n", | ||
" obj[\"courses\"] = {}\n", | ||
" obj[\"courses\"][course] = role\n", | ||
" # if photo_url and not photo_url.isspace():\n", | ||
" # obj[\"imgUrl\"] = photo_url\n", | ||
" if bio and not bio.isspace():\n", | ||
" obj[\"details\"] = bio\n", | ||
" if web_url and not web_url.isspace():\n", | ||
" obj[\"webUrl\"] = web_url\n", | ||
"\n", | ||
" if course in NORMALIZED_REJECTIONS:\n", | ||
" pass\n", | ||
" elif role == \"Exec\" or email_no_dot in exec_bios:\n", | ||
" # print(f\"\\t{name} for exec\")\n", | ||
" exec_roles[email_no_dot][\"imgUrl\"] = \"\" # photo_url\n", | ||
" exec_roles[email_no_dot][\"pronouns\"] = pronouns\n", | ||
" exec_bios[email_no_dot][\"imgUrl\"] = \"\" # photo_url\n", | ||
" exec_bios[email_no_dot][\"pronouns\"] = pronouns\n", | ||
" exec_bios[email_no_dot][\"details\"] = bio\n", | ||
" exec_bios[email_no_dot][\"webUrl\"] = web_url\n", | ||
" if email_no_dot in people_by_email:\n", | ||
" update(email_no_dot)\n", | ||
" # else:\n", | ||
" # print(f\"=== SKIPPING EXEC {name} ===\")\n", | ||
" else:\n", | ||
" # print(f\"\\t{name} for {course}\")\n", | ||
" if email_no_dot not in people_by_email:\n", | ||
" people_by_email[email_no_dot] = {\n", | ||
" \"name\": name,\n", | ||
" \"pronouns\": pronouns,\n", | ||
" \"details\": bio,\n", | ||
" \"imgUrl\": \"\", # photo_url\n", | ||
" \"webUrl\": web_url,\n", | ||
" }\n", | ||
" if not course or course.isspace():\n", | ||
" print(f\"=== NO COURSE FOUND FOR {name} ===\")\n", | ||
" else:\n", | ||
" people_by_email[email_no_dot][\"courses\"] = {course: role}\n", | ||
" else:\n", | ||
" update(email_no_dot)\n", | ||
"\n", | ||
" with open(IMG_PATH) as f:\n", | ||
" reader = csv.reader(f)\n", | ||
" for row in reader:\n", | ||
" _, url, _, email = row\n", | ||
" email_no_dot = email.replace(\".\", \"\").lower().strip()\n", | ||
" fixed_url = \"https://drive.google.com/open?id=\" + url.split(\"/\")[-2]\n", | ||
" if email_no_dot in exec_roles:\n", | ||
" exec_roles[email_no_dot][\"imgUrl\"] = fixed_url\n", | ||
" if email_no_dot in exec_bios:\n", | ||
" exec_bios[email_no_dot][\"imgUrl\"] = fixed_url\n", | ||
" if email_no_dot in people_by_email:\n", | ||
" people_by_email[email_no_dot][\"imgUrl\"] = fixed_url\n", | ||
" \n", | ||
" # filter exec from people_by_email\n", | ||
" for email, bio in people_by_email.items():\n", | ||
" if \"courses\" in bio.keys() and \"exec\" in bio[\"courses\"]:\n", | ||
" del bio[\"courses\"][\"exec\"]\n", | ||
" return people_by_email" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 59, | ||
"id": "158777d8-f9bd-4d93-a34b-7ade94d7d276", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Parsing bios...\n", | ||
"Dumping jsons...\n", | ||
"Done!\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(\"Parsing bios...\")\n", | ||
"people_by_email = parse_bios(BIOS_PATH, ROSTER_PATH)\n", | ||
"print(\"Dumping jsons...\")\n", | ||
"# Write mentor bios\n", | ||
"with open(DEST_PATH, \"w\") as outfile:\n", | ||
" json.dump(list(people_by_email.values()), outfile, indent=4)\n", | ||
"with open(f\"../src/data/team/{CURR_SEMESTER}.json\", \"w\") as exec_file:\n", | ||
" json.dump(list(exec_roles.values()), exec_file, indent=4)\n", | ||
"with open(f\"../src/data/bios/exec.json\", \"w\") as exec_bio:\n", | ||
" json.dump(list(exec_bios.values()), exec_bio, indent=4)\n", | ||
"print(\"Done!\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "33165cc6-b03e-4ebd-9c78-4d72659d8e14", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "6b886ae9-56dc-423e-9776-c5e65070c33e", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.13" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters