-
Notifications
You must be signed in to change notification settings - Fork 0
/
streamlit_app.py
155 lines (117 loc) · 5.49 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import streamlit as st
import altair as alt
import pandas as pd
from wordcloud import WordCloud
# Function to load and return the datasets
def load_data():
# Load the datasets here. Update the file paths as needed.
data = pd.read_csv('Motor_Vehicle_Collisions_-_Crashes-2.csv')
return data
# Function for Hourly Trend Chart
def create_hour_chart(data):
# Convert 'CRASH TIME' to datetime and extract hour
data['CRASH TIME'] = pd.to_datetime(data['CRASH TIME'], format='%H:%M').dt.hour
hour_chart = alt.Chart(data).mark_bar().encode(
x=alt.X('CRASH TIME:O', axis=alt.Axis(title='Time of Day')),
y=alt.Y('count():Q', axis=alt.Axis(title='Total Incidents')),
tooltip=[alt.Tooltip('CRASH TIME', title='Hour'), alt.Tooltip('count()', title='Total Incidents')]
).properties(
title='Incidents by Hour of the Day'
)
return hour_chart
# Function for Hourly Trend Chart
def create_day_chart(data):
# Convert 'CRASH DATE' to datetime specifying the format
data['CRASH DATE'] = pd.to_datetime(data['CRASH DATE'])
data['DayOfWeek'] = data['CRASH DATE'].dt.day_name()
data['Month'] = data['CRASH DATE'].dt.month_name()
day_chart = alt.Chart(data).mark_bar().encode(
x=alt.X('DayOfWeek:O', axis=alt.Axis(title='Day of the Week')),
y=alt.Y('count():Q', axis=alt.Axis(title='Total Incidents')),
tooltip=[alt.Tooltip('DayOfWeek', title = "Day of the Week"), alt.Tooltip('count()', title = "Total Incidents")]
).properties(
title='Incidents by Day of the Week'
)
return day_chart
# Function for Injury Chart
def create_injury_chart(data):
injury_chart = alt.Chart(data).mark_bar().encode(
x=alt.X('sum(NUMBER OF PERSONS INJURED):Q', axis=alt.Axis(title='Total Number of Injuries')),
y=alt.Y('BOROUGH:N', axis=alt.Axis(title='Borough')),
color='BOROUGH:N',
tooltip=[alt.Tooltip('BOROUGH', title = "Borough"), alt.Tooltip('sum(NUMBER OF PERSONS INJURED)', title = "Total Injuries")]
).properties(
title='Injury Counts by Borough'
)
return injury_chart
# Function for Injury Chart
def create_borough_chart(data):
# Borough Counts
borough_chart = alt.Chart(data).mark_bar().encode(
x=alt.X('count():Q', axis=alt.Axis(title='Incident Count')),
y=alt.Y('BOROUGH:N', axis=alt.Axis(title='Borough')),
color='BOROUGH:N',
tooltip=[alt.Tooltip('BOROUGH', title = "Borough"), alt.Tooltip('count()', title = "Total Incidents")]
).properties(
title='Incident Counts by Borough'
)
return borough_chart
# Function for Heatmap
def create_heatmap(data):
# Filter the data
data = data.dropna(subset=['VEHICLE TYPE CODE 1'])
data = data[data['NUMBER OF PERSONS INJURED'] > 0]
# Aggregate data by vehicle type and borough
heatmap_data = data.groupby(['VEHICLE TYPE CODE 1', 'BOROUGH']).size().reset_index(name='Incidents')
# Filter out rows with incident counts less than or equal to 5
heatmap_data = heatmap_data[heatmap_data['Incidents'] > 5]
# Create the heatmap
heatmap = alt.Chart(heatmap_data).mark_rect().encode(
x=alt.X('BOROUGH:N', axis=alt.Axis(title='Borough')),
y=alt.Y('VEHICLE TYPE CODE 1:N', axis=alt.Axis(title='Vehicle Type')),
color=alt.Color('Incidents:Q', scale=alt.Scale(scheme='greenblue'), legend=alt.Legend(title="Number of Incidents")),
tooltip=[alt.Tooltip('BOROUGH', title = "Borough"), alt.Tooltip('VEHICLE TYPE CODE 1', title = "Vehicle Type"), alt.Tooltip('Incidents', title = "Total Incidents")]
).properties(
title='Heatmap of Incidents by Vehicle Type and Borough'
)
return heatmap
# Function for Word Cloud
def create_word_cloud(data):
# Combine all contributing factor columns into a single string
contributing_factors = data[['CONTRIBUTING FACTOR VEHICLE 1']].fillna('').agg(' '.join, axis=1)
# Generate the word cloud
wordcloud = WordCloud(width = 800, height = 800,
background_color ='white',
stopwords = set(),
min_font_size = 10).generate(' '.join(contributing_factors))
# Save the word cloud to a file
wordcloud_file_path = 'word_cloud.png'
wordcloud.to_file(wordcloud_file_path)
return wordcloud_file_path
def main():
st.title("NYC Motor Vehicle Collisions")
# Load the data
data = load_data()
# Create a multi-select widget for borough selection
selected_boroughs = st.multiselect('Select Boroughs', options=data['BOROUGH'].unique())
# Filter data based on selected boroughs
if selected_boroughs:
filtered_data = data[data['BOROUGH'].isin(selected_boroughs)]
else:
filtered_data = data
# Create and display charts using filtered data
hour_chart = create_hour_chart(filtered_data)
st.altair_chart(hour_chart, use_container_width=True)
day_chart = create_day_chart(filtered_data)
st.altair_chart(day_chart, use_container_width=True)
injury_chart = create_injury_chart(filtered_data)
st.altair_chart(injury_chart, use_container_width=True)
borough_chart = create_borough_chart(filtered_data)
st.altair_chart(borough_chart, use_container_width=True)
heatmap = create_heatmap(filtered_data)
st.altair_chart(heatmap, use_container_width=True)
# Word Cloud
wordcloud_file_path = create_word_cloud(filtered_data)
st.image(wordcloud_file_path, caption='Word Cloud', use_column_width=True)
if __name__ == "__main__":
main()