Implemented Caching in loaded datasets for Improved Performance

recodehive · Oct 5, 2024 · dd41b12 · dd41b12
1 parent 821b1b4
commit dd41b12
Showing 1 changed file with 15 additions and 9 deletions.
diff --git a/streamlit/home.py b/streamlit/home.py
@@ -9,17 +9,21 @@
 # DATA LOADING
 #######################################
 
-st.set_page_config(layout='wide')
+st.set_page_config(layout='wide') 
+
+@st.cache_data # Caching data loading functions
+def load_data(url):
+    return pd.read_csv(url)
 
 # Loading data files from the 'streamlit' directory
-df = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2020.csv')
-df2018 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2018.csv')
-full_data2018 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2018.csv')
-full_data2019 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2019.csv')
-full_df2020 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2020.csv')
-df2019 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2019.csv')
-df2021 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2021.csv')
-df2022 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2022.csv')
+df = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2020.csv')
+df2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2018.csv')
+full_data2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2018.csv')
+full_data2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2019.csv')
+full_df2020 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2020.csv')
+df2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2019.csv')
+df2021 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2021.csv')
+df2022 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2022.csv')
 
 # Filter the 2020 dataframe
 df2020 = df[df['SalaryUSD'] < 200000]
@@ -109,12 +113,14 @@
 }
 df_ai.replace(short_mapping, inplace=True)
 
+@st.cache_data
 def mean_salary(df):
     mean_salary = df[df['SalaryUSD'] <= 1000000]['SalaryUSD'].mean()
     df.loc[df['SalaryUSD'] > 1000000, 'SalaryUSD'] = mean_salary
     return df
 
 # Function to create value count plots for each column
+@st.cache_data
 def plot_value_counts(column_name):
     colors = ['skyblue', 'yellow']
     fig = px.bar(df_ai[column_name].value_counts().reset_index(), x='index', y=column_name, color_discrete_sequence=[random.choice(colors)])