-
-
Notifications
You must be signed in to change notification settings - Fork 54
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Script to fetch OpenMeteo Data(NWP Forecast and Historical data) #93
base: main
Are you sure you want to change the base?
Changes from 2 commits
d87d589
be4a232
3bb5cf0
ddaa1b2
2494c84
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import openmeteo_requests | ||
import requests_cache | ||
import pandas as pd | ||
from retry_requests import retry | ||
import numpy as np | ||
import xarray as xr | ||
from typing import Tuple, List | ||
|
||
class WeatherDataFetcher: | ||
def __init__(self): | ||
# Setup the Open-Meteo API client with cache and retry on error | ||
cache_session = requests_cache.CachedSession('.cache', expire_after=3600) | ||
retry_session = retry(cache_session, retries=5, backoff_factor=0.2) | ||
self.openmeteo = openmeteo_requests.Client(session=retry_session) | ||
|
||
def generate_lat_lon_grid(self, lat_range: Tuple[float, float] = (-90, 90), lon_range: Tuple[float, float] = (-180, 180), lat_step: float = 0.25, lon_step: float = 0.25) -> Tuple[np.ndarray, np.ndarray]: | ||
latitudes = np.arange(lat_range[0], lat_range[1] + lat_step, lat_step) | ||
longitudes = np.arange(lon_range[0], lon_range[1] + lon_step, lon_step) | ||
return latitudes, longitudes | ||
|
||
def fetch_world_grid_data(self, start_date: str, end_date: str, weather_variables: List[str]) -> xr.Dataset: | ||
# Generate latitude and longitude grid | ||
latitudes, longitudes = self.generate_lat_lon_grid() | ||
|
||
# Split the grid into smaller chunks (adjust as needed) | ||
chunk_size = 200 | ||
latitude_chunks = [latitudes[i:i+chunk_size] for i in range(0, len(latitudes), chunk_size)] | ||
longitude_chunks = [longitudes[i:i+chunk_size] for i in range(0, len(longitudes), chunk_size)] | ||
|
||
all_data = [] | ||
lat = [] | ||
lon = [] | ||
# Make API requests for each chunk of latitude and longitude values | ||
for lat_chunk, lon_chunk in zip(latitude_chunks, longitude_chunks): | ||
params = { | ||
"latitude": lat_chunk.tolist(), | ||
"longitude": lon_chunk.tolist(), | ||
"hourly": weather_variables, | ||
"start_date": start_date, | ||
"end_date": end_date | ||
} | ||
try: | ||
responses = self.openmeteo.weather_api(url, params=params) | ||
|
||
except: | ||
break | ||
res = [lat for lat in lat_chunk.tolist()] | ||
lat+=res | ||
res = [lon for lon in lat_chunk.tolist()] | ||
lon+=res | ||
# Process responses as needed | ||
for response in responses: | ||
data = { | ||
"latitude": response.Latitude(), | ||
"longitude": response.Longitude(), | ||
"date": pd.date_range( | ||
start=pd.to_datetime(response.Hourly().Time(), unit="s", utc=True), | ||
end=pd.to_datetime(response.Hourly().TimeEnd(), unit="s", utc=True), | ||
freq=pd.Timedelta(seconds=response.Hourly().Interval()), | ||
inclusive="left" | ||
) | ||
} | ||
for var in weather_variables: | ||
data[var] = response.Hourly().Variables(weather_variables.index(var)).ValuesAsNumpy() | ||
|
||
all_data.append(data) | ||
print(len(lat)) | ||
print(all_data[0]["visibility"]) | ||
# Create an xarray dataset from the collected data | ||
dataset = xr.Dataset( | ||
{var: (["latitude", "longitude", "date"], np.array(all_data[i][var])) for i,var in zip(range(len(all_data)),weather_variables)}, | ||
coords={"latitude": lat, "longitude": lon, "date": np.array(all_data[i]["date"] for i in range((len(all_data))))} | ||
) | ||
return dataset | ||
|
||
# Example usage: | ||
fetcher = WeatherDataFetcher() | ||
start_date = "2024-01-01" | ||
end_date = "2024-01-10" | ||
weather_variables = ["temperature_2m", "precipitation", "visibility", "cloud_cover"] | ||
world_grid_data = fetcher.fetch_world_grid_data(start_date, end_date, weather_variables) | ||
print(world_grid_data) |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,114 @@ | ||||||
import openmeteo_requests # Importing required libraries | ||||||
import requests_cache | ||||||
import pandas as pd | ||||||
from retry_requests import retry | ||||||
|
||||||
class WeatherDataFetcher: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could this be changed the following, so that the name is more descriptive of the data it is getting
Suggested change
|
||||||
BASE_URL = "https://api.open-meteo.com/v1/" # Base URL for OpenMeteo API | ||||||
|
||||||
def __init__(self): | ||||||
# Initialize the WeatherDataFetcher class | ||||||
# Setup the Open-Meteo API client with cache and retry on error | ||||||
cache_session = requests_cache.CachedSession('.cache', expire_after=3600) | ||||||
retry_session = retry(cache_session, retries=5, backoff_factor=0.2) | ||||||
self.openmeteo = openmeteo_requests.Client(session=retry_session) | ||||||
|
||||||
def fetch_forecast_data(self, NWP, params): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
We don't want to hard code the NWP that we are using. Ideally, we also want type hints for the inputs and outputs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Understood, I'll add type hints as per your suggestion. |
||||||
# Fetch weather data from OpenMeteo API for the specified model (NWP) and parameters | ||||||
url = f"https://api.open-meteo.com/v1/{NWP}" # Construct API URL | ||||||
try: | ||||||
responses = self.openmeteo.weather_api(url, params=params) # Get weather data | ||||||
return responses[0] # Return the first response (assuming only one location) | ||||||
except openmeteo_requests.OpenMeteoRequestsError as e: | ||||||
# Handle OpenMeteoRequestsError exceptions | ||||||
if 'No data is available for this location' in str(e): | ||||||
print(f"Error: No data available for the location for model '{NWP}'.") | ||||||
else: | ||||||
print(f"Error: {e}") | ||||||
return None | ||||||
|
||||||
def fetch_historical_data(self, params): | ||||||
# Fetch historical weather data from OpenMeteo API | ||||||
BASE_URL = "https://archive-api.open-meteo.com/v1/archive" | ||||||
try: | ||||||
responses = self.openmeteo.weather_api(BASE_URL, params=params) | ||||||
return responses[0] if responses else None | ||||||
except ValueError as e: | ||||||
print(f"Error: {e}") | ||||||
return None | ||||||
|
||||||
def process_hourly_data(self, response): | ||||||
# Process hourly data from OpenMeteo API response | ||||||
# Extract hourly data from the response | ||||||
hourly = response.Hourly() | ||||||
|
||||||
# Extract variables | ||||||
hourly_variables = { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally, the variables that are extracted are not hardcoded, but can be passed in as arguments. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. surely will do that, but could you please provide guidance on which variables should be included in the Xarray Dataset? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, this reply slipped through, but I would go with by default, all available ones, and make one of the arguments a list of parameter names. I think there should be a way to get all the available parameters for a model from the API or something? |
||||||
"temperature_2m": hourly.Variables(0).ValuesAsNumpy(), | ||||||
"relative_humidity_2m": hourly.Variables(1).ValuesAsNumpy(), | ||||||
"precipitation": hourly.Variables(2).ValuesAsNumpy(), | ||||||
"cloud_cover": hourly.Variables(3).ValuesAsNumpy() | ||||||
} | ||||||
|
||||||
# Extract time information | ||||||
time_range = pd.date_range( | ||||||
start=pd.to_datetime(hourly.Time(), unit="s", utc=True), | ||||||
end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True), | ||||||
freq=pd.Timedelta(seconds=hourly.Interval()), | ||||||
inclusive="left" | ||||||
) | ||||||
|
||||||
# Create a dictionary for hourly data | ||||||
hourly_data = {"date": time_range} | ||||||
|
||||||
# Assign each variable to the corresponding key in the dictionary | ||||||
for variable_name, variable_values in hourly_variables.items(): | ||||||
hourly_data[variable_name] = variable_values | ||||||
|
||||||
# Create a DataFrame from the dictionary | ||||||
hourly_dataframe = pd.DataFrame(data=hourly_data) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For this, we want to have the data be returned in an Xarray Dataset, that has coordinates of |
||||||
return hourly_dataframe | ||||||
|
||||||
def print_location_info(self, response): | ||||||
# Print location information from OpenMeteo API response | ||||||
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E") | ||||||
print(f"Elevation {response.Elevation()} m asl") | ||||||
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}") | ||||||
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s") | ||||||
|
||||||
|
||||||
def main(): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would be great in the tests folder, as a pytest test! So then we can automatically run this on all code changes. |
||||||
# Main function to demonstrate usage of WeatherDataFetcher class | ||||||
fetcher = WeatherDataFetcher() # Create instance of WeatherDataFetcher | ||||||
|
||||||
# Specify parameters for weather data fetch | ||||||
NWP = "gfs" # Choose NWP model | ||||||
|
||||||
# NWP models = ["dwd-icon", "gfs", "ecmwf", "meteofrance", "jma", "metno", "gem", "bom", "cma"] | ||||||
|
||||||
params = { | ||||||
"latitude": 40.77, # Latitude of the location | ||||||
"longitude": -73.91, # Longitude of the location | ||||||
"hourly": ["temperature_2m", "relative_humidity_2m", "precipitation", "cloud_cover"], # Variables to fetch | ||||||
"start_date": "2023-12-21", # Start date for data | ||||||
"end_date": "2024-03-15" # End date for data | ||||||
} | ||||||
|
||||||
# Fetch weather data for specified model and parameters | ||||||
response = fetcher.fetch_forecast_data(NWP, params) | ||||||
|
||||||
# Print location information | ||||||
fetcher.print_location_info(response) | ||||||
|
||||||
# Process and print hourly data | ||||||
gfs_dataframe = fetcher.process_hourly_data(response) | ||||||
print(gfs_dataframe) | ||||||
|
||||||
# Fetch historical weather data | ||||||
history = fetcher.fetch_historical_data(params) | ||||||
history_dataframe = fetcher.process_hourly_data(history) | ||||||
print(history_dataframe) | ||||||
|
||||||
|
||||||
if __name__ == "__main__": | ||||||
main() # Call main function if script is executed directly |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi @jacobbieker,
I'm encountering an issue while creating an xarray dataset with the OpenMeteo data due to dimension problems. Although I'm able to successfully fetch datasets for multiple coordinates, I'm facing challenges with dimension handling. although the len of dims are same, still!
I'm planning to add an argument for NWP (Numerical Weather Prediction) if we need to specify a particular NWP in the function. What do you think about this approach?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, this is a bit hard to debug from this, but if you add to each data point the
coord
latitude and longitude, that might then work to reshape into a grid?For adding an argument to specify the NWP, that is perfect! We want to be able to access all the NWPs from OpenMeteo from this, so that would be ideal.