Skip to content

Commit b70fb0f

Browse files
kandersolarAdamRJensenadriesse
authored
Add function for accessing ERA5 (#2573)
* function, docs, tests * make api key secret accessible to tests * bit more docs * handle another API error * lint * fix test * fix tests, again * one more * use Timeout instead of Exception * Apply suggestions from code review Co-authored-by: Adam R. Jensen <39184289+AdamRJensen@users.noreply.github.com> * rename from ECMWF to ERA5 * and fix tests * make unit conversion funcs private * Apply suggestions from code review Co-authored-by: Anton Driesse <anton.driesse@pvperformancelabs.com> * convert input times to UTC if not localized * lint * fix test bug --------- Co-authored-by: Adam R. Jensen <39184289+AdamRJensen@users.noreply.github.com> Co-authored-by: Anton Driesse <anton.driesse@pvperformancelabs.com>
1 parent 90af1f2 commit b70fb0f

File tree

6 files changed

+326
-0
lines changed

6 files changed

+326
-0
lines changed

docs/sphinx/source/reference/iotools.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,17 @@ lower quality.
234234
iotools.read_crn
235235

236236

237+
ECMWF ERA5
238+
^^^^^^^^^^
239+
240+
A global reanalysis dataset providing weather and solar resource data.
241+
242+
.. autosummary::
243+
:toctree: generated/
244+
245+
iotools.get_era5
246+
247+
237248
Generic data file readers
238249
-------------------------
239250

docs/sphinx/source/whatsnew/v0.13.2.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ Enhancements
3939
:py:func:`~pvlib.singlediode.bishop88_i_from_v`. (:issue:`2497`, :pull:`2498`)
4040
* Accelerate :py:func:`~pvlib.pvsystem.singlediode` when scipy>=1.15 is
4141
installed. (:issue:`2497`, :pull:`2571`)
42+
* Add :py:func:`~pvlib.iotools.get_era5`, a function for accessing
43+
ERA5 reanalysis data. (:pull:`2573`)
4244

4345

4446
Documentation

pvlib/iotools/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,4 @@
4242
from pvlib.iotools.meteonorm import get_meteonorm_observation_training # noqa: F401, E501
4343
from pvlib.iotools.meteonorm import get_meteonorm_tmy # noqa: F401
4444
from pvlib.iotools.nasa_power import get_nasa_power # noqa: F401
45+
from pvlib.iotools.era5 import get_era5 # noqa: F401

pvlib/iotools/era5.py

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
import requests
2+
import pandas as pd
3+
from io import BytesIO, StringIO
4+
import zipfile
5+
import time
6+
7+
8+
VARIABLE_MAP = {
9+
# short names
10+
'd2m': 'temp_dew',
11+
't2m': 'temp_air',
12+
'sp': 'pressure',
13+
'ssrd': 'ghi',
14+
'tp': 'precipitation',
15+
16+
# long names
17+
'2m_dewpoint_temperature': 'temp_dew',
18+
'2m_temperature': 'temp_air',
19+
'surface_pressure': 'pressure',
20+
'surface_solar_radiation_downwards': 'ghi',
21+
'total_precipitation': 'precipitation',
22+
}
23+
24+
25+
def _same(x):
26+
return x
27+
28+
29+
def _k_to_c(temp_k):
30+
return temp_k - 273.15
31+
32+
33+
def _j_to_w(j):
34+
return j / 3600
35+
36+
37+
def _m_to_cm(m):
38+
return m / 100
39+
40+
41+
UNITS = {
42+
'u100': _same,
43+
'v100': _same,
44+
'u10': _same,
45+
'v10': _same,
46+
'd2m': _k_to_c,
47+
't2m': _k_to_c,
48+
'msl': _same,
49+
'sst': _k_to_c,
50+
'skt': _k_to_c,
51+
'sp': _same,
52+
'ssrd': _j_to_w,
53+
'strd': _j_to_w,
54+
'tp': _m_to_cm,
55+
}
56+
57+
58+
def get_era5(latitude, longitude, start, end, variables, api_key,
59+
map_variables=True, timeout=60,
60+
url='https://cds.climate.copernicus.eu/api/retrieve/v1/'):
61+
"""
62+
Retrieve ERA5 reanalysis data from the ECMWF's Copernicus Data Store.
63+
64+
A CDS API key is needed to access this API. Register for one at [1]_.
65+
66+
This API [2]_ provides a subset of the full ERA5 dataset. See [3]_ for
67+
the available variables. Data are available on a 0.25° x 0.25° grid.
68+
69+
Parameters
70+
----------
71+
latitude : float
72+
In decimal degrees, north is positive (ISO 19115).
73+
longitude: float
74+
In decimal degrees, east is positive (ISO 19115).
75+
start : datetime like or str
76+
First day of the requested period. Assumed to be UTC if not localized.
77+
end : datetime like or str
78+
Last day of the requested period. Assumed to be UTC if not localized.
79+
variables : list of str
80+
List of variable names to retrieve, for example
81+
``['ghi', 'temp_air']``. Both pvlib and ERA5 names can be used.
82+
See [1]_ for additional options.
83+
api_key : str
84+
ECMWF CDS API key.
85+
map_variables : bool, default True
86+
When true, renames columns of the DataFrame to pvlib variable names
87+
where applicable. Also converts units of some variables. See variable
88+
:const:`VARIABLE_MAP` and :const:`UNITS`.
89+
timeout : int, default 60
90+
Number of seconds to wait for the requested data to become available
91+
before timeout.
92+
url : str, optional
93+
API endpoint URL.
94+
95+
Raises
96+
------
97+
Exception
98+
If ``timeout`` is reached without the job finishing.
99+
100+
Returns
101+
-------
102+
data : pd.DataFrame
103+
Time series data. The index corresponds to the start of the interval.
104+
meta : dict
105+
Metadata.
106+
107+
References
108+
----------
109+
.. [1] https://cds.climate.copernicus.eu/
110+
.. [2] https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-timeseries?tab=overview
111+
.. [3] https://confluence.ecmwf.int/pages/viewpage.action?pageId=505390919
112+
""" # noqa: E501
113+
114+
def _to_utc_dt_notz(dt):
115+
dt = pd.to_datetime(dt)
116+
if dt.tzinfo is not None:
117+
dt = dt.tz_convert("UTC")
118+
return dt
119+
120+
start = _to_utc_dt_notz(start).strftime("%Y-%m-%d")
121+
end = _to_utc_dt_notz(end).strftime("%Y-%m-%d")
122+
123+
headers = {'PRIVATE-TOKEN': api_key}
124+
125+
# allow variables to be specified with pvlib names
126+
reverse_map = {v: k for k, v in VARIABLE_MAP.items()}
127+
variables = [reverse_map.get(k, k) for k in variables]
128+
129+
# Step 1: submit data request (add it to the queue)
130+
params = {
131+
"inputs": {
132+
"variable": variables,
133+
"location": {"longitude": longitude, "latitude": latitude},
134+
"date": [f"{start}/{end}"],
135+
"data_format": "csv"
136+
}
137+
}
138+
slug = "processes/reanalysis-era5-single-levels-timeseries/execution"
139+
response = requests.post(url + slug, json=params, headers=headers,
140+
timeout=timeout)
141+
submission_response = response.json()
142+
if not response.ok:
143+
raise Exception(submission_response) # likely need to accept license
144+
145+
job_id = submission_response['jobID']
146+
147+
# Step 2: poll until the data request is ready
148+
slug = "jobs/" + job_id
149+
poll_interval = 1
150+
num_polls = 0
151+
while True:
152+
response = requests.get(url + slug, headers=headers, timeout=timeout)
153+
poll_response = response.json()
154+
job_status = poll_response['status']
155+
156+
if job_status == 'successful':
157+
break # ready to proceed to next step
158+
elif job_status == 'failed':
159+
msg = (
160+
'Request failed. Please check the ECMWF website for details: '
161+
'https://cds.climate.copernicus.eu/requests?tab=all'
162+
)
163+
raise Exception(msg)
164+
165+
num_polls += 1
166+
if num_polls * poll_interval > timeout:
167+
raise requests.exceptions.Timeout(
168+
'Request timed out. Try increasing the timeout parameter or '
169+
'reducing the request size.'
170+
)
171+
172+
time.sleep(1)
173+
174+
# Step 3: get the download link for our requested dataset
175+
slug = "jobs/" + job_id + "/results"
176+
response = requests.get(url + slug, headers=headers, timeout=timeout)
177+
results_response = response.json()
178+
download_url = results_response['asset']['value']['href']
179+
180+
# Step 4: finally, download our dataset. it's a zipfile of one CSV
181+
response = requests.get(download_url, timeout=timeout)
182+
zipbuffer = BytesIO(response.content)
183+
archive = zipfile.ZipFile(zipbuffer)
184+
filename = archive.filelist[0].filename
185+
csvbuffer = StringIO(archive.read(filename).decode('utf-8'))
186+
df = pd.read_csv(csvbuffer)
187+
188+
# and parse into the usual formats
189+
metadata = submission_response['metadata'] # include messages from ECMWF
190+
metadata['jobID'] = job_id
191+
if not df.empty:
192+
metadata['latitude'] = df['latitude'].values[0]
193+
metadata['longitude'] = df['longitude'].values[0]
194+
195+
df.index = pd.to_datetime(df['valid_time']).dt.tz_localize('UTC')
196+
df = df.drop(columns=['valid_time', 'latitude', 'longitude'])
197+
198+
if map_variables:
199+
# convert units and rename
200+
for shortname in df.columns:
201+
converter = UNITS.get(shortname, _same)
202+
df[shortname] = converter(df[shortname])
203+
df = df.rename(columns=VARIABLE_MAP)
204+
205+
return df, metadata

tests/conftest.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,19 @@ def nrel_api_key():
130130
reason='requires solaranywhere credentials')
131131

132132

133+
try:
134+
# Attempt to load ECMWF API key used for testing
135+
# pvlib.iotools.get_era5
136+
ecwmf_api_key = os.environ["ECMWF_API_KEY"]
137+
has_ecmwf_credentials = True
138+
except KeyError:
139+
has_ecmwf_credentials = False
140+
141+
requires_ecmwf_credentials = pytest.mark.skipif(
142+
not has_ecmwf_credentials,
143+
reason='requires ECMWF credentials')
144+
145+
133146
try:
134147
import statsmodels # noqa: F401
135148
has_statsmodels = True

tests/iotools/test_era5.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
"""
2+
tests for pvlib/iotools/era5.py
3+
"""
4+
5+
import pandas as pd
6+
import pytest
7+
import pvlib
8+
import requests
9+
import os
10+
from tests.conftest import RERUNS, RERUNS_DELAY, requires_ecmwf_credentials
11+
12+
13+
@pytest.fixture
14+
def params():
15+
api_key = os.environ["ECMWF_API_KEY"]
16+
17+
return {
18+
'latitude': 40.01, 'longitude': -80.01,
19+
'start': '2020-06-01', 'end': '2020-06-01',
20+
'variables': ['ghi', 'temp_air'],
21+
'api_key': api_key,
22+
}
23+
24+
25+
@pytest.fixture
26+
def expected():
27+
index = pd.date_range("2020-06-01 00:00", "2020-06-01 23:59", freq="h",
28+
tz="UTC")
29+
index.name = 'valid_time'
30+
temp_air = [16.6, 15.2, 13.5, 11.2, 10.8, 9.1, 7.3, 6.8, 7.6, 7.4, 8.5,
31+
8.1, 9.8, 11.5, 14.1, 17.4, 18.3, 20., 20.7, 20.9, 21.5,
32+
21.6, 21., 20.7]
33+
ghi = [153., 18.4, 0., 0., 0., 0., 0., 0., 0., 0., 0., 60., 229.5,
34+
427.8, 620.1, 785.5, 910.1, 984.2, 1005.9, 962.4, 844.1, 685.2,
35+
526.9, 331.4]
36+
df = pd.DataFrame({'temp_air': temp_air, 'ghi': ghi}, index=index)
37+
return df
38+
39+
40+
@requires_ecmwf_credentials
41+
@pytest.mark.remote_data
42+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
43+
def test_get_era5(params, expected):
44+
df, meta = pvlib.iotools.get_era5(**params)
45+
pd.testing.assert_frame_equal(df, expected, check_freq=False, atol=0.1)
46+
assert meta['longitude'] == -80.0
47+
assert meta['latitude'] == 40.0
48+
assert isinstance(meta['jobID'], str)
49+
50+
51+
@requires_ecmwf_credentials
52+
@pytest.mark.remote_data
53+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
54+
def test_get_era5_timezone(params, expected):
55+
params['start'] = pd.to_datetime(params['start']).tz_localize('Etc/GMT+8')
56+
params['end'] = pd.to_datetime(params['end']).tz_localize('Etc/GMT+8')
57+
df, meta = pvlib.iotools.get_era5(**params)
58+
pd.testing.assert_frame_equal(df, expected, check_freq=False, atol=0.1)
59+
assert meta['longitude'] == -80.0
60+
assert meta['latitude'] == 40.0
61+
assert isinstance(meta['jobID'], str)
62+
63+
64+
@requires_ecmwf_credentials
65+
@pytest.mark.remote_data
66+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
67+
def test_get_era5_map_variables(params, expected):
68+
df, meta = pvlib.iotools.get_era5(**params, map_variables=False)
69+
expected = expected.rename(columns={'temp_air': 't2m', 'ghi': 'ssrd'})
70+
df['t2m'] -= 273.15 # apply unit conversions manually
71+
df['ssrd'] /= 3600
72+
pd.testing.assert_frame_equal(df, expected, check_freq=False, atol=0.1)
73+
assert meta['longitude'] == -80.0
74+
assert meta['latitude'] == 40.0
75+
assert isinstance(meta['jobID'], str)
76+
77+
78+
@requires_ecmwf_credentials
79+
@pytest.mark.remote_data
80+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
81+
def test_get_era5_error(params):
82+
params['variables'] = ['nonexistent']
83+
match = 'Request failed. Please check the ECMWF website'
84+
with pytest.raises(Exception, match=match):
85+
df, meta = pvlib.iotools.get_era5(**params)
86+
87+
88+
@requires_ecmwf_credentials
89+
@pytest.mark.remote_data
90+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
91+
def test_get_era5_timeout(params):
92+
match = 'Request timed out. Try increasing'
93+
with pytest.raises(requests.exceptions.Timeout, match=match):
94+
df, meta = pvlib.iotools.get_era5(**params, timeout=1)

0 commit comments

Comments
 (0)