-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.py
72 lines (64 loc) · 2.09 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import json
import sqlite3
import urllib.request
url = "https://www.fire.ca.gov/api/sitecore/Incident/GetFiresForMap?showFeatured=false"
headers = {
"User-Agent": "cafireshistorydb ([email protected])",
}
print("Fetching data from API...")
req = urllib.request.Request(url, headers=headers)
with urllib.request.urlopen(req) as response:
json_data = response.read().decode("utf-8")
data = json.loads(json_data)
print(f"Received data for {len(data)} fire incidents")
with sqlite3.connect("data/fires.db") as conn:
cursor = conn.cursor()
print("Creating incidents table if it doesn't exist")
cursor.execute("""
CREATE TABLE IF NOT EXISTS incidents (
UniqueId TEXT PRIMARY KEY,
Name TEXT,
Updated TEXT,
Started TEXT,
AdminUnit TEXT,
County TEXT,
Location TEXT,
AcresBurned REAL,
PercentContained REAL,
Longitude REAL,
Latitude REAL,
Url TEXT,
IsActive INTEGER
)
""")
new_or_updated_count = 0
for item in data:
cursor.execute(
"""
INSERT OR REPLACE INTO incidents (
UniqueId, Name, Updated, Started, AdminUnit, County, Location,
AcresBurned, PercentContained, Longitude, Latitude, Url, IsActive
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
item["UniqueId"],
item["Name"],
item["Updated"],
item["Started"],
item["AdminUnit"],
item["County"],
item["Location"],
item["AcresBurned"],
item["PercentContained"],
item["Longitude"],
item["Latitude"],
item["Url"],
1 if item["IsActive"] else 0,
),
)
if cursor.rowcount > 0:
new_or_updated_count += 1
print(f"Inserted or updated: {item['Name']} (ID: {item['UniqueId']})")
print(f"Total rows inserted or updated: {new_or_updated_count}")
print("Operation completed")