recodehive · sanjay-kv · Oct 16, 2024 · Oct 15, 2024 · Oct 15, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
 catboost_info/
+venv/
+.venv/
 streamlit\__pycache__
diff --git a/streamlit/__pycache__/functions.cpython-312.pyc b/streamlit/__pycache__/functions.cpython-312.pyc
diff --git a/streamlit/__pycache__/main_analysis.cpython-312.pyc b/streamlit/__pycache__/main_analysis.cpython-312.pyc
diff --git a/streamlit/functions.py b/streamlit/functions.py
@@ -20,6 +20,21 @@
 results = pd.read_csv("https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/results.csv")
 
 
+# Load the data from local CSV files
+# data = pd.read_csv('df2020.csv')
+# df2018 = pd.read_csv('df2018.csv')
+# full_data2018 = pd.read_csv('survey_results_sample_2018.csv')
+# full_data2019 = pd.read_csv('survey_results_sample_2019.csv')
+# full_df2020 = pd.read_csv('survey_results_sample_2020.csv')
+# df2019 = pd.read_csv('df2019.csv')
+
+# # Filter the 2020 data
+# df2020 = data[(data['SalaryUSD'] < 200000)]
+
+# # Load results for job satisfaction from the local file
+# results = pd.read_csv("results.csv")
+
+
 #######################################
 # VISUALISATION STARTS
 #######################################
@@ -35,7 +50,7 @@ def plot_boxplot(data, x, y, title):
 
 #########################################################################    
 
-def plot_bar_plotly(df, column_name, top_n=10, height=450, width=700):
+def plot_bar_plotly(df, column_name, top_n=10, height=450, width=700, key=None):
     df_counts = df[column_name].value_counts().head(top_n).reset_index()
     df_counts.columns = [column_name, 'Count']
 
@@ -46,7 +61,7 @@ def plot_bar_plotly(df, column_name, top_n=10, height=450, width=700):
     fig.update_layout(xaxis_title=column_name, yaxis_title='Number of Developers')
     fig.update_layout(height=height, width=width)
 
-    return st.plotly_chart(fig)
+    return st.plotly_chart(fig, key=key)
 
 
 def plot_pie_plotly(df, column_name,top_n=10,  height=400, width=400 ):

diff --git a/streamlit/home.py b/streamlit/home.py
@@ -16,14 +16,24 @@ def load_data(url):
     return pd.read_csv(url)
 
 # Loading data files from the 'streamlit' directory
-df = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2020.csv')
-df2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2018.csv')
-full_data2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2018.csv')
-full_data2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2019.csv')
-full_df2020 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2020.csv')
-df2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2019.csv')
-df2021 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2021.csv')
-df2022 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2022.csv')
+# df = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2020.csv')
+# df2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2018.csv')
+# full_data2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2018.csv')
+# full_data2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2019.csv')
+# full_df2020 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2020.csv')
+# df2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2019.csv')
+# df2021 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2021.csv')
+# df2022 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2022.csv')
+
+df = load_data('df2020.csv')
+df2018 = load_data('df2018.csv')
+full_data2018 = load_data('survey_results_sample_2018.csv')
+full_data2019 = load_data('survey_results_sample_2019.csv')
+full_df2020 = load_data('survey_results_sample_2020.csv')
+df2019 = load_data('df2019.csv')
+df2021 = load_data('df2021.csv')
+df2022 = load_data('df2022.csv')
+
 
 # Filter the 2020 dataframe
 df2020 = df[df['SalaryUSD'] < 200000]
@@ -136,7 +146,7 @@ def plot_value_counts(column_name):
 
 if year == '2018':
     main.main_analysis(df2018)
-    main.main_analysis_2(df2018)
+    main.main_analysis_2(df2018, year)
 
     visual, analysis = st.columns((3, 1))
     with visual:
@@ -209,7 +219,7 @@ def plot_value_counts(column_name):
 
 elif year == '2019':
     main.main_analysis(df2019)
-    main.main_analysis_2(df2019)
+    main.main_analysis_2(df2019, year)
 
     visual, analysis = st.columns((3, 1))
     with visual:
@@ -228,7 +238,7 @@ def plot_value_counts(column_name):
 
 elif year == '2020':
     main.main_analysis(df2020)
-    main.main_analysis_2(df2020)
+    main.main_analysis_2(df2020, year)
 
     visual, analysis = st.columns((3, 1))
     with visual:
@@ -247,7 +257,7 @@ def plot_value_counts(column_name):
 
 elif year == '2021':
     main.main_analysis(df2021)
-    main.common_analysis_2021_2022(df2021)
+    main.common_analysis_2021_2022(df2021, year)
     visual, analysis = st.columns((3, 1))
     with visual:
         fig = func.plot_valuecounts_plotly(df2021,'NEWStuck')
@@ -281,7 +291,7 @@ def plot_value_counts(column_name):
 
 else:
     main.main_analysis(df2022)
-    main.common_analysis_2021_2022(df2022)
+    main.common_analysis_2021_2022(df2022, year)
 
     fig = func.compare_language_columns_and_plot(df2022, 'OpSysPersonal use', 'OpSysProfessional use')
 

diff --git a/streamlit/main_analysis.py b/streamlit/main_analysis.py
@@ -103,7 +103,7 @@ def main_analysis(df):
 
     with visual:
         st.title("Ethnicity VS Participation")
-        ff.plot_bar_plotly(df, 'Ethnicity')
+        ff.plot_bar_plotly(df, 'Ethnicity', key='ethnicity_plot')
 
     with analysis:
         Ethnicity_text = """
@@ -176,7 +176,7 @@ def main_analysis(df):
     if ds is not None: 
         with visual:
             st.title("Country Wise Data Scientists Participation")
-            ff.plot_bar_plotly(ds, "Country")
+            ff.plot_bar_plotly(ds, "Country", key='country_plot')
 
         with analysis:     
             data_scientist_participation_text = """
@@ -196,7 +196,7 @@ def main_analysis(df):
 ##### To Speed Up the Web Page, Main Analysis is divided into 2 ######
 
 
-def main_analysis_2(df):
+def main_analysis_2(df, year):
     visual2, analysis2 = st.columns((3,1))
 
     if df is df2019:
@@ -262,7 +262,8 @@ def main_analysis_2(df):
     if ds is not None: 
         with visual2:
             st.title("Country Wise Data Scientists Participation")
-            ff.plot_bar_plotly(ds, "Country")
+            # Use the year parameter instead of year_variable
+            ff.plot_bar_plotly(ds, "Country", key=f'country_plot_{year}')
 
         with analysis2:     
             data_scientist_participation_text = """
@@ -368,7 +369,7 @@ def common_analysis_2021_2022(df):
         st.markdown(employment_text, unsafe_allow_html=True)
 
     with visual3:
-        ff.plot_bar_plotly(df, 'DevType', top_n=10, height=500, width=1000)
+        ff.plot_bar_plotly(df, 'DevType', top_n=10, height=500, width=1000, key=f'devtype_plot_{year_variable}')
 
     with analysis3:
         devtype_text = """
@@ -429,4 +430,3 @@ def common_analysis_2021_2022(df):
         </div>
     """
         st.markdown(webframe_text, unsafe_allow_html=True)
-