pvlib
diff --git a/‎docs/sphinx/source/reference/iotools.rst‎
Lines changed: 11 additions & 0 deletions b/‎docs/sphinx/source/reference/iotools.rst‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎docs/sphinx/source/whatsnew/v0.13.2.rst‎
Lines changed: 2 additions & 0 deletions b/‎docs/sphinx/source/whatsnew/v0.13.2.rst‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pvlib/iotools/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎pvlib/iotools/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pvlib/iotools/era5.py‎
Lines changed: 205 additions & 0 deletions b/‎pvlib/iotools/era5.py‎
Lines changed: 205 additions & 0 deletions
diff --git a/‎tests/conftest.py‎
Lines changed: 13 additions & 0 deletions b/‎tests/conftest.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎tests/iotools/test_era5.py‎
Lines changed: 94 additions & 0 deletions b/‎tests/iotools/test_era5.py‎
Lines changed: 94 additions & 0 deletions
@@ -234,6 +234,17 @@ lower quality.
  iotools.read_crn
 
 
+ECMWF ERA5
+^^^^^^^^^^
+
+A global reanalysis dataset providing weather and solar resource data.
+
+.. autosummary::
+ :toctree: generated/
+
+ iotools.get_era5
+
+
 Generic data file readers
 -------------------------
 
 
@@ -39,6 +39,8 @@ Enhancements
  :py:func:`~pvlib.singlediode.bishop88_i_from_v`. (:issue:`2497`, :pull:`2498`)
 * Accelerate :py:func:`~pvlib.pvsystem.singlediode` when scipy>=1.15 is
  installed. (:issue:`2497`, :pull:`2571`)
+* Add :py:func:`~pvlib.iotools.get_era5`, a function for accessing
+ ERA5 reanalysis data. (:pull:`2573`)
 
 
 Documentation
 
@@ -42,3 +42,4 @@
 from pvlib.iotools.meteonorm import get_meteonorm_observation_training # noqa: F401, E501
 from pvlib.iotools.meteonorm import get_meteonorm_tmy # noqa: F401
 from pvlib.iotools.nasa_power import get_nasa_power # noqa: F401
+from pvlib.iotools.era5 import get_era5 # noqa: F401
@@ -0,0 +1,205 @@
+import requests
+import pandas as pd
+from io import BytesIO, StringIO
+import zipfile
+import time
+
+
+VARIABLE_MAP = {
+ # short names
+ 'd2m': 'temp_dew',
+ 't2m': 'temp_air',
+ 'sp': 'pressure',
+ 'ssrd': 'ghi',
+ 'tp': 'precipitation',
+
+ # long names
+ '2m_dewpoint_temperature': 'temp_dew',
+ '2m_temperature': 'temp_air',
+ 'surface_pressure': 'pressure',
+ 'surface_solar_radiation_downwards': 'ghi',
+ 'total_precipitation': 'precipitation',
+}
+
+
+def _same(x):
+ return x
+
+
+def _k_to_c(temp_k):
+ return temp_k - 273.15
+
+
+def _j_to_w(j):
+ return j / 3600
+
+
+def _m_to_cm(m):
+ return m / 100
+
+
+UNITS = {
+ 'u100': _same,
+ 'v100': _same,
+ 'u10': _same,
+ 'v10': _same,
+ 'd2m': _k_to_c,
+ 't2m': _k_to_c,
+ 'msl': _same,
+ 'sst': _k_to_c,
+ 'skt': _k_to_c,
+ 'sp': _same,
+ 'ssrd': _j_to_w,
+ 'strd': _j_to_w,
+ 'tp': _m_to_cm,
+}
+
+
+def get_era5(latitude, longitude, start, end, variables, api_key,
+ map_variables=True, timeout=60,
+ url='https://cds.climate.copernicus.eu/api/retrieve/v1/'):
+ """
+ Retrieve ERA5 reanalysis data from the ECMWF's Copernicus Data Store.
+
+ A CDS API key is needed to access this API. Register for one at [1]_.
+
+ This API [2]_ provides a subset of the full ERA5 dataset. See [3]_ for
+ the available variables. Data are available on a 0.25° x 0.25° grid.
+
+ Parameters
+ ----------
+ latitude : float
+ In decimal degrees, north is positive (ISO 19115).
+ longitude: float
+ In decimal degrees, east is positive (ISO 19115).
+ start : datetime like or str
+ First day of the requested period. Assumed to be UTC if not localized.
+ end : datetime like or str
+ Last day of the requested period. Assumed to be UTC if not localized.
+ variables : list of str
+ List of variable names to retrieve, for example
+ ``['ghi', 'temp_air']``. Both pvlib and ERA5 names can be used.
+ See [1]_ for additional options.
+ api_key : str
+ ECMWF CDS API key.
+ map_variables : bool, default True
+ When true, renames columns of the DataFrame to pvlib variable names
+ where applicable. Also converts units of some variables. See variable
+ :const:`VARIABLE_MAP` and :const:`UNITS`.
+ timeout : int, default 60
+ Number of seconds to wait for the requested data to become available
+ before timeout.
+ url : str, optional
+ API endpoint URL.
+
+ Raises
+ ------
+ Exception
+ If ``timeout`` is reached without the job finishing.
+
+ Returns
+ -------
+ data : pd.DataFrame
+ Time series data. The index corresponds to the start of the interval.
+ meta : dict
+ Metadata.
+
+ References
+ ----------
+ .. [1] https://cds.climate.copernicus.eu/
+ .. [2] https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-timeseries?tab=overview
+ .. [3] https://confluence.ecmwf.int/pages/viewpage.action?pageId=505390919
+ """ # noqa: E501
+
+ def _to_utc_dt_notz(dt):
+ dt = pd.to_datetime(dt)
+ if dt.tzinfo is not None:
+ dt = dt.tz_convert("UTC")
+ return dt
+
+ start = _to_utc_dt_notz(start).strftime("%Y-%m-%d")
+ end = _to_utc_dt_notz(end).strftime("%Y-%m-%d")
+
+ headers = {'PRIVATE-TOKEN': api_key}
+
+ # allow variables to be specified with pvlib names
+ reverse_map = {v: k for k, v in VARIABLE_MAP.items()}
+ variables = [reverse_map.get(k, k) for k in variables]
+
+ # Step 1: submit data request (add it to the queue)
+ params = {
+ "inputs": {
+ "variable": variables,
+ "location": {"longitude": longitude, "latitude": latitude},
+ "date": [f"{start}/{end}"],
+ "data_format": "csv"
+ }
+ }
+ slug = "processes/reanalysis-era5-single-levels-timeseries/execution"
+ response = requests.post(url + slug, json=params, headers=headers,
+ timeout=timeout)
+ submission_response = response.json()
+ if not response.ok:
+ raise Exception(submission_response) # likely need to accept license
+
+ job_id = submission_response['jobID']
+
+ # Step 2: poll until the data request is ready
+ slug = "jobs/" + job_id
+ poll_interval = 1
+ num_polls = 0
+ while True:
+ response = requests.get(url + slug, headers=headers, timeout=timeout)
+ poll_response = response.json()
+ job_status = poll_response['status']
+
+ if job_status == 'successful':
+ break # ready to proceed to next step
+ elif job_status == 'failed':
+ msg = (
+ 'Request failed. Please check the ECMWF website for details: '
+ 'https://cds.climate.copernicus.eu/requests?tab=all'
+ )
+ raise Exception(msg)
+
+ num_polls += 1
+ if num_polls * poll_interval > timeout:
+ raise requests.exceptions.Timeout(
+ 'Request timed out. Try increasing the timeout parameter or '
+ 'reducing the request size.'
+ )
+
+ time.sleep(1)
+
+ # Step 3: get the download link for our requested dataset
+ slug = "jobs/" + job_id + "/results"
+ response = requests.get(url + slug, headers=headers, timeout=timeout)
+ results_response = response.json()
+ download_url = results_response['asset']['value']['href']
+
+ # Step 4: finally, download our dataset. it's a zipfile of one CSV
+ response = requests.get(download_url, timeout=timeout)
+ zipbuffer = BytesIO(response.content)
+ archive = zipfile.ZipFile(zipbuffer)
+ filename = archive.filelist[0].filename
+ csvbuffer = StringIO(archive.read(filename).decode('utf-8'))
+ df = pd.read_csv(csvbuffer)
+
+ # and parse into the usual formats
+ metadata = submission_response['metadata'] # include messages from ECMWF
+ metadata['jobID'] = job_id
+ if not df.empty:
+ metadata['latitude'] = df['latitude'].values[0]
+ metadata['longitude'] = df['longitude'].values[0]
+
+ df.index = pd.to_datetime(df['valid_time']).dt.tz_localize('UTC')
+ df = df.drop(columns=['valid_time', 'latitude', 'longitude'])
+
+ if map_variables:
+ # convert units and rename
+ for shortname in df.columns:
+ converter = UNITS.get(shortname, _same)
+ df[shortname] = converter(df[shortname])
+ df = df.rename(columns=VARIABLE_MAP)
+
+ return df, metadata
@@ -130,6 +130,19 @@ def nrel_api_key():
  reason='requires solaranywhere credentials')
 
 
+try:
+ # Attempt to load ECMWF API key used for testing
+ # pvlib.iotools.get_era5
+ ecwmf_api_key = os.environ["ECMWF_API_KEY"]
+ has_ecmwf_credentials = True
+except KeyError:
+ has_ecmwf_credentials = False
+
+requires_ecmwf_credentials = pytest.mark.skipif(
+ not has_ecmwf_credentials,
+ reason='requires ECMWF credentials')
+
+
 try:
  import statsmodels # noqa: F401
  has_statsmodels = True
 
@@ -0,0 +1,94 @@
+"""
+tests for pvlib/iotools/era5.py
+"""
+
+import pandas as pd
+import pytest
+import pvlib
+import requests
+import os
+from tests.conftest import RERUNS, RERUNS_DELAY, requires_ecmwf_credentials
+
+
+@pytest.fixture
+def params():
+ api_key = os.environ["ECMWF_API_KEY"]
+
+ return {
+ 'latitude': 40.01, 'longitude': -80.01,
+ 'start': '2020-06-01', 'end': '2020-06-01',
+ 'variables': ['ghi', 'temp_air'],
+ 'api_key': api_key,
+ }
+
+
+@pytest.fixture
+def expected():
+ index = pd.date_range("2020-06-01 00:00", "2020-06-01 23:59", freq="h",
+ tz="UTC")
+ index.name = 'valid_time'
+ temp_air = [16.6, 15.2, 13.5, 11.2, 10.8, 9.1, 7.3, 6.8, 7.6, 7.4, 8.5,
+ 8.1, 9.8, 11.5, 14.1, 17.4, 18.3, 20., 20.7, 20.9, 21.5,
+ 21.6, 21., 20.7]
+ ghi = [153., 18.4, 0., 0., 0., 0., 0., 0., 0., 0., 0., 60., 229.5,
+ 427.8, 620.1, 785.5, 910.1, 984.2, 1005.9, 962.4, 844.1, 685.2,
+ 526.9, 331.4]
+ df = pd.DataFrame({'temp_air': temp_air, 'ghi': ghi}, index=index)
+ return df
+
+
+@requires_ecmwf_credentials
+@pytest.mark.remote_data
+@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
+def test_get_era5(params, expected):
+ df, meta = pvlib.iotools.get_era5(**params)
+ pd.testing.assert_frame_equal(df, expected, check_freq=False, atol=0.1)
+ assert meta['longitude'] == -80.0
+ assert meta['latitude'] == 40.0
+ assert isinstance(meta['jobID'], str)
+
+
+@requires_ecmwf_credentials
+@pytest.mark.remote_data
+@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
+def test_get_era5_timezone(params, expected):
+ params['start'] = pd.to_datetime(params['start']).tz_localize('Etc/GMT+8')
+ params['end'] = pd.to_datetime(params['end']).tz_localize('Etc/GMT+8')
+ df, meta = pvlib.iotools.get_era5(**params)
+ pd.testing.assert_frame_equal(df, expected, check_freq=False, atol=0.1)
+ assert meta['longitude'] == -80.0
+ assert meta['latitude'] == 40.0
+ assert isinstance(meta['jobID'], str)
+
+
+@requires_ecmwf_credentials
+@pytest.mark.remote_data
+@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
+def test_get_era5_map_variables(params, expected):
+ df, meta = pvlib.iotools.get_era5(**params, map_variables=False)
+ expected = expected.rename(columns={'temp_air': 't2m', 'ghi': 'ssrd'})
+ df['t2m'] -= 273.15 # apply unit conversions manually
+ df['ssrd'] /= 3600
+ pd.testing.assert_frame_equal(df, expected, check_freq=False, atol=0.1)
+ assert meta['longitude'] == -80.0
+ assert meta['latitude'] == 40.0
+ assert isinstance(meta['jobID'], str)
+
+
+@requires_ecmwf_credentials
+@pytest.mark.remote_data
+@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
+def test_get_era5_error(params):
+ params['variables'] = ['nonexistent']
+ match = 'Request failed. Please check the ECMWF website'
+ with pytest.raises(Exception, match=match):
+ df, meta = pvlib.iotools.get_era5(**params)
+
+
+@requires_ecmwf_credentials
+@pytest.mark.remote_data
+@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
+def test_get_era5_timeout(params):
+ match = 'Request timed out. Try increasing'
+ with pytest.raises(requests.exceptions.Timeout, match=match):
+ df, meta = pvlib.iotools.get_era5(**params, timeout=1)