uwblueprint · EdmondLi1 · Nov 12, 2024 · Nov 12, 2024 · Nov 12, 2024 · Nov 19, 2024
diff --git a/backend/matching/data/config.py b/backend/matching/data/config.py
@@ -1,7 +1,122 @@
 # config.py
 
 # Constants for output format choices
-OUTPUT_FORMAT_CHOICES = ["dataframe", "csv", "json", "excel"]
+OUTPUT_FORMAT_CHOICES = ["dataframe", "csv", "json", "excel", "db"]
 
 # Constants for formats that require a file path
 FILE_PATH_REQUIRED_FORMATS = ["csv", "json", "excel"]
+
+OPTIONS_FOR_DATA = ["participant", "volunteer", "matching"]
+
+# Constants for the different types of data that can be generated
+
+############################################################################################################
+### DEMOGRAPHICS DATA
+############################################################################################################
+YES_NO = ["Yes", "No"]
+FIRST_NAMES = ["John", "Jane", "Alex", "Taylor", "Sam", "Chris"]
+LAST_NAMES = ["Doe", "Smith", "Lee", "Patel", "Brown", "Garcia"]
+PROVINCES = [
+    "Alberta",
+    "British Columbia",
+    "Manitoba",
+    "New Brunswick",
+    "Newfoundland and Labrador",
+    "Northwest Territories",
+    "Nova Scotia",
+    "Nunavut",
+    "Ontario",
+    "Prince Edward Island",
+    "Quebec",
+    "Saskatchewan",
+    "Yukon",
+]
+LANGUAGES = ["English", "French"]
+GENDER_IDENTITIES = [
+    "Female",
+    "Male",
+    "Non-binary",
+    "Prefer not to answer",
+    "Prefer to self-describe",  # TODO: not sure how to accomidate for these 'other options'
+]
+PRONOUNS = ["she/her", "he/him", "they/them", "other"]
+ETHNIC_GROUPS = [
+    "Black (including African and Caribbean descent)",
+    "Middle Eastern, Western or Central Asian",
+    "Chinese",
+    "East Asian, excluding Chinese",
+    "Indigenous person from Canada",
+    "Latin American",
+    "South Asian",
+    "Southeast Asian",
+    "White/Caucasian",
+    "Mixed ethnicity",
+    "Prefer not to answer",
+]
+MARITAL_STATUSES = ["Single", "Married/Common Law", "Divorced", "Widowed"]
+
+
+############################################################################################################
+### MEDICAL INFORMATION DATA
+############################################################################################################
+CAREGIVING_TYPES = [
+    "Parent",
+    "Sibling",
+    "Child",
+    "Spouse",
+    "Friend",
+    "Other",  # TODO: how do we impl this? (like if the user chooses other)
+]
+DIAGNOSES = {
+    "Unknown": ["Unknown"],
+    "Category 1": [
+        "Acute Myeloid Leukemia",
+        "Acute Lymphoblastic Leukemia",
+        "Acute Promyelocytic leukemia",
+        "Mixed Phenotype Leukemia",
+    ],
+    "Category 2": [
+        "Chronic Lymphocytic Leukemia / Small Lymphocytic Lymphoma",
+        "Chronic Myeloid Leukemia",
+        "Hairy Cell Leukemia",
+    ],
+    "Category 3": [
+        "Myeloma",
+        "Hodgin’s Lymphoma",
+        "Indolent/low grade Non-Hodgkin’s Lymphoma",
+        "Aggressive/high grade Non-Hodgkin’s Lymphoma",
+    ],
+    "Category 4": ["Low risk MDS", "High Risk MDS"],
+    "Category 5": ["Myelofibrosis", "Essential Thrombocythemia", "Polycythemia Vera"],
+}
+
+TREATMENTS = [
+    "Unknown",
+    "Watch and Wait / Active Surveillance",
+    "Chemotherapy/immunotherapy",
+    "Oral Chemotherapy",
+    "Radiation",
+    "Maintenance chemotherapy",
+    "Palliative care",
+    "Transfusions",
+    "Autologous Stem Cell Transplant",
+    "Allogeneic Stem cell Transplant",
+    "Haplo Stem Cell Transplant",
+    "CAR-T",
+]
+
+EXPERIENCES = [
+    "Brain Fog",
+    "Fatigue",
+    "Fertility Issues",
+    "Graft vs Host",
+    "Returning to work after/during treatment",
+    "Returning to school after/during treatment",
+    "Speaking to your children about diagnosis",
+    "Speaking to your family or friends about diagnosis",
+    "Relapse",
+    "Anxiety",
+    "Depression",
+    "PTSD",
+    "Side effects from treatment",
+]
diff --git a/backend/matching/data/data_category/demographics.py b/backend/matching/data/data_category/demographics.py
@@ -1,67 +1,114 @@
 import random
+import datetime
+import re
+from faker import Faker
+from backend.matching.data.config import (
+    PROVINCES,
+    LANGUAGES,
+    GENDER_IDENTITIES,
+    PRONOUNS,
+    ETHNIC_GROUPS,
+    MARITAL_STATUSES,
+    YES_NO,
+)
 
+fake = Faker()  # generic faker
+fake_ca = Faker("en_CA")  # canadian faker
 
+
+# TODO: this uses the random and datetime; we can use a seeder like Faker to generate more realistic data
+# TODO: have highlighted relevent fields for the matching algorithm
 class Demographics:
-    # TODO: Add more roles and diagnoses (as we go and finalize the survey)
-    # TODO: can we move these field paramaters to a constants file?
-
-    GENDER_IDENTITIES = [
-        "Female",
-        "Male",
-        "Non-binary",
-        "Prefer not to answer",
-        "Prefer to self-describe",
-    ]
-    ETHNIC_GROUPS = [
-        "Black (including African and Caribbean descent)",
-        "Middle Eastern",
-        "East Asian",
-        "South Asian",
-        "Southeast Asian",
-        "Indigenous person from Canada",
-        "Latin American",
-        "White",
-        "Mixed ethnicity",
-        "Prefer not to answer",
-        "Another background/Prefer to self describe",
-    ]
-    PRONOUNS = [
-        "He/Him",
-        "She/Her",
-        "They/Them",
-        "Ze/Hir",
-        "Prefer not to answer",
-        "Other",
-    ]
-    CITIES = [
-        "Toronto",
-        "Vancouver",
-        "Montreal",
-        "Calgary",
-        "Ottawa",
-        "Edmonton",
-        "Winnipeg",
-        "Quebec City",
-        "Hamilton",
-        "Halifax",
-    ]
+    @staticmethod
+    # temporary function to get random first name
+    def get_random_first_name():
+        return fake.first_name()
+
+    # temporary function to get random first name
+    @staticmethod
+    def get_random_last_name():
+        return fake.last_name()
+
+    @staticmethod
+    def get_random_date_of_birth(min_age=18, max_age=90):
+        today = datetime.date.today()
+        age = random.randint(min_age, max_age)
+        dob = today.replace(year=today.year - age)
+        return dob.strftime("%Y-%m-%d")
+
+    # temporary function to get random first name
+    # TODO: not needed for matching algorithm; can use Faker here to emulate better response
+    @staticmethod
+    def get_random_email():
+        return fake.email()
+
+    # TODO: not needed for matching algorithm; can use Faker here to emulate better response
+    @staticmethod
+    def get_random_phone():
+        return fake_ca.phone_number()
+
+    # TODO: not needed for matching algorithm; can use Faker here to emulate better response
+    @staticmethod
+    def get_random_postal_code():
+        # use the regex part here to make it random
+        template = "A0A 0A0"
+
+        # Use re.sub with specific replacement logic for each position
+        return re.sub(
+            r"[A-Z]|\d",
+            lambda x: random.choice(
+                "ABCEGHJKLMNPRSTVXY"
+                if x.start() == 0
+                else "ABCEGHJKLMNPRSTVWXYZ"
+                if x.group().isalpha()
+                else "0123456789"
+            ),
+            template,
+        )
 
+    # IMPORTANT: for matching algo
+    @staticmethod
+    def get_random_province():
+        # todo: can prob use the ecanadian data to also return the provinces
+        return random.choice(PROVINCES)
+
+    # temporary function to get random first name
+    @staticmethod
+    def get_random_city():
+        # using the canadian localized data for the cities only
+        return fake_ca.city()
+
+    # IMPORTANT: for matching algo
+    @staticmethod
+    def get_random_language():
+        return random.choice(LANGUAGES)
+
+    # IMPORTANT: for matching algo
     @staticmethod
     def get_random_gender_identity():
-        return random.choice(Demographics.GENDER_IDENTITIES)
+        return random.choice(GENDER_IDENTITIES)
+
+    # IMPORTANT: for matching algo
+    @staticmethod
+    def get_random_pronouns():
+        return random.choice(PRONOUNS)
 
+    # IMPORTANT: for matching algo
     @staticmethod
     def get_random_ethnic_background():
-        return random.sample(Demographics.ETHNIC_GROUPS, k=random.randint(1, 3))
+        return random.choice(ETHNIC_GROUPS)
 
+    # IMPORTANT: for matching algo
     @staticmethod
-    def get_random_age(min_age=18, max_age=90):
-        return random.randint(min_age, max_age)
+    def get_random_marital_status():
+        return random.choice(MARITAL_STATUSES)
 
+    # IMPORTANT: for matching algo
     @staticmethod
-    def get_random_pronouns():
-        return random.choice(Demographics.PRONOUNS)
+    def get_random_children_status():
+        return random.choice(YES_NO)
 
+    #### FOR THE VOLUNTEER QUESITONS
     @staticmethod
-    def get_random_city():
-        return random.choice(Demographics.CITIES)
+    def get_criminal_record_status():
+        return random.choice(YES_NO)
diff --git a/backend/matching/data/data_category/medical_information.py b/backend/matching/data/data_category/medical_information.py
@@ -0,0 +1,55 @@
+import random
+from datetime import datetime, timedelta
+from backend.matching.data.config import (
+    DIAGNOSES,
+    TREATMENTS,
+    EXPERIENCES,
+    YES_NO,
+    CAREGIVING_TYPES,
+)
+
+
+class MedicalInformation:
+    # TODO: Add more roles and diagnoses (as we go and finalize the survey)
+    # TODO: can we move these field paramaters to a constants file?
+
+    @staticmethod
+    def get_random_blood_cancer_question():
+        return random.choice(YES_NO)
+
+    @staticmethod
+    def get_random_caregiver_question():
+        return random.choice(YES_NO)
+
+    @staticmethod
+    def get_random_caregiver_type():
+        return random.choice(CAREGIVING_TYPES)
+
+    # IMPORTANT: for matching algo
+    @staticmethod
+    def get_random_diagnosis():
+        category = random.choice(list(DIAGNOSES.keys()))
+        return random.choice(DIAGNOSES[category])
+
+    # IMPORTANT: for matching algo
+    @staticmethod
+    def get_random_date_of_diagnosis(start_year=1969, end_year=datetime.now().year):
+        start_date = datetime(start_year, 1, 1)
+        end_date = datetime(end_year, 12, 31)
+        random_date = start_date + timedelta(
+            days=random.randint(0, (end_date - start_date).days)
+        )
+        return random_date.strftime("%B %Y")
+
+    # IMPORTANT: for matching algo
+    @staticmethod
+    def get_random_treatment():
+        return random.choice(TREATMENTS)
+
+    # IMPORTANT: for matching algo
+    # there can be multiple experiences that they can select
+    @staticmethod
+    def get_random_experience():
+        num_experiences = random.randint(0, len(EXPERIENCES))
+        # reutrns empty of all of the experiences
+        return random.sample(EXPERIENCES, num_experiences)
diff --git a/backend/matching/data/data_category/personality.py b/backend/matching/data/data_category/personality.py