- Notifications
You must be signed in to change notification settings - Fork 344
Refactored retry config into _retry.py and added support for exponential backoff and Retry-After header #871
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 1 commit
Commits
Show all changes
4 commits Select commit Hold shift + click to select a range
4c3a6d9 Refactored retry config to `_retry.py` and added support for backoff …
jonathanedey 1c4c844 Added unit tests for `_retry.py`
jonathanedey b3aba37 Updated unit tests for HTTPX request errors
jonathanedey 0b976cb Address review comments
jonathanedey File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Refactored retry config to
_retry.py and added support for backoff … …and Retry-After
- Loading branch information
commit 4c3a6d9db06a2548c46cb8d975b1691925c3e775
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,246 @@ | ||
| # Copyright 2025 Google Inc. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| | ||
| """Internal retry logic module | ||
| | ||
| This module provides utilities for adding retry logic to HTTPX requests | ||
| """ | ||
| | ||
| from __future__ import annotations | ||
| import copy | ||
| import email.utils | ||
| import random | ||
| import re | ||
| import time | ||
| from types import CoroutineType | ||
| from typing import Any, Callable, List, Optional, Tuple | ||
| import logging | ||
| import asyncio | ||
| import httpx | ||
| | ||
| logger = logging.getLogger(__name__) | ||
| | ||
| | ||
| class HttpxRetry: | ||
| """HTTPX based retry config""" | ||
| # TODO: Decide | ||
| # urllib3.Retry ignores the status_forcelist and only respects Retry-After header | ||
| # for 413, 429 and 503 errors. | ||
| # Should we do the same? | ||
| # Default status codes to be used for ``status_forcelist`` | ||
| RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503]) | ||
| | ||
| #: Default maximum backoff time. | ||
| DEFAULT_BACKOFF_MAX = 120 | ||
| | ||
| def __init__( | ||
| self, | ||
| status: int = 10, | ||
| status_forcelist: Optional[List[int]] = None, | ||
| backoff_factor: float = 0, | ||
| backoff_max: float = DEFAULT_BACKOFF_MAX, | ||
| raise_on_status: bool = False, | ||
| backoff_jitter: float = 0, | ||
| history: Optional[List[Tuple[ | ||
| httpx.Request, | ||
| Optional[httpx.Response], | ||
| Optional[Exception] | ||
| ]]] = None, | ||
| respect_retry_after_header: bool = False, | ||
| ) -> None: | ||
| self.status = status | ||
| self.status_forcelist = status_forcelist | ||
| self.backoff_factor = backoff_factor | ||
| self.backoff_max = backoff_max | ||
| self.raise_on_status = raise_on_status | ||
jonathanedey marked this conversation as resolved. Outdated Show resolved Hide resolved | ||
| self.backoff_jitter = backoff_jitter | ||
| if history: | ||
| self.history = history | ||
| else: | ||
| self.history = [] | ||
| self.respect_retry_after_header = respect_retry_after_header | ||
| | ||
| def copy(self) -> HttpxRetry: | ||
| """Creates a deep copy of this instance.""" | ||
| return copy.deepcopy(self) | ||
| | ||
| def is_retryable_response(self, response: httpx.Response) -> bool: | ||
| """Determine if a response implies that the request should be retried if possible.""" | ||
| if self.status_forcelist and response.status_code in self.status_forcelist: | ||
| return True | ||
| | ||
| has_retry_after = bool(response.headers.get("Retry-After")) | ||
| if ( | ||
| self.respect_retry_after_header | ||
| and has_retry_after | ||
| and response.status_code in self.RETRY_AFTER_STATUS_CODES | ||
| ): | ||
| return True | ||
| | ||
| return False | ||
| | ||
| # Placeholder for exception retrying | ||
| def is_retryable_error(self, error: Exception): | ||
jonathanedey marked this conversation as resolved. Outdated Show resolved Hide resolved | ||
| """Determine if the error implies that the request should be retired if possible.""" | ||
| logger.debug(error) | ||
| return False | ||
| | ||
| def is_exhausted(self) -> bool: | ||
| """Determine if there are anymore more retires.""" | ||
| # status count is negative | ||
| return self.status < 0 | ||
| | ||
| # Identical implementation of `urllib3.Retry.parse_retry_after()` | ||
| def _parse_retry_after(self, retry_after_header: str) -> float | None: | ||
| """Parses Retry-After string into a float with unit seconds.""" | ||
| seconds: float | ||
| # Whitespace: https://tools.ietf.org/html/rfc7230#section-3.2.4 | ||
| if re.match(r"^\s*[0-9]+\s*$", retry_after_header): | ||
| seconds = int(retry_after_header) | ||
| else: | ||
| retry_date_tuple = email.utils.parsedate_tz(retry_after_header) | ||
| if retry_date_tuple is None: | ||
| # TODO: Verify if this is the appropriate way to handle this. | ||
| raise httpx.RemoteProtocolError(f"Invalid Retry-After header: {retry_after_header}") | ||
| | ||
| retry_date = email.utils.mktime_tz(retry_date_tuple) | ||
| seconds = retry_date - time.time() | ||
| | ||
| seconds = max(seconds, 0) | ||
| | ||
| return seconds | ||
| | ||
| def get_retry_after(self, response: httpx.Response) -> float | None: | ||
| """Determine the Retry-After time needed before sending the next request.""" | ||
| retry_after_header = response.headers.get('Retry_After', None) | ||
| if retry_after_header: | ||
| # Convert retry header to a float in seconds | ||
| return self._parse_retry_after(retry_after_header) | ||
| return None | ||
| | ||
| def get_backoff_time(self): | ||
| """Determine the backoff time needed before sending the next request.""" | ||
| # request_count is the number of previous request attempts | ||
| request_count = len(self.history) | ||
| backoff = self.backoff_factor * (2 ** (request_count-1)) | ||
| if self.backoff_jitter: | ||
| backoff += random.random() * self.backoff_jitter | ||
| return float(max(0, min(self.backoff_max, backoff))) | ||
| | ||
| async def sleep_for_backoff(self) -> None: | ||
| """Determine and wait the backoff time needed before sending the next request.""" | ||
| backoff = self.get_backoff_time() | ||
| logger.debug('Sleeping for %f seconds following failed request', backoff) | ||
| await asyncio.sleep(backoff) | ||
| | ||
| async def sleep(self, response: httpx.Response) -> None: | ||
| """Determine and wait the time needed before sending the next request.""" | ||
| if self.respect_retry_after_header: | ||
| retry_after = self.get_retry_after(response) | ||
| if retry_after: | ||
| await asyncio.sleep(retry_after) | ||
| return | ||
| await self.sleep_for_backoff() | ||
| | ||
| def increment( | ||
| self, | ||
| request: httpx.Request, | ||
| response: Optional[httpx.Response] = None, | ||
| error: Optional[Exception] = None | ||
| ) -> None: | ||
| """Update the retry state based on request attempt.""" | ||
| if response and self.is_retryable_response(response): | ||
| self.status -= 1 | ||
| self.history.append((request, response, error)) | ||
| | ||
| | ||
| # TODO: Remove comments | ||
jonathanedey marked this conversation as resolved. Outdated Show resolved Hide resolved | ||
| # Note - This implementation currently covers: | ||
| # - basic retires for pre-defined status errors | ||
| # - applying retry backoff and backoff jitter | ||
| # - ability to respect a response's retry-after header | ||
| class HttpxRetryTransport(httpx.AsyncBaseTransport): | ||
| """HTTPX transport with retry logic.""" | ||
| | ||
| # DEFAULT_RETRY = HttpxRetry( | ||
| # connect=1, read=1, status=4, status_forcelist=[500, 503], | ||
| # raise_on_status=False, backoff_factor=0.5, allowed_methods=None | ||
| # ) | ||
| DEFAULT_RETRY = HttpxRetry(status=4, status_forcelist=[500, 503], backoff_factor=0.5) | ||
| | ||
| # We could also support passing kwargs here | ||
| def __init__(self, retry: HttpxRetry = DEFAULT_RETRY, **kwargs) -> None: | ||
| self._retry = retry | ||
| | ||
| transport_kwargs = kwargs.copy() | ||
| transport_kwargs.update({'retries': 0, 'http2': True}) | ||
| # We should use a full AsyncHTTPTransport under the hood since that is | ||
| # fully implemented. We could consider making this class extend a | ||
| # AsyncHTTPTransport instead and use the parent class's methods to handle | ||
| # requests. We sould also ensure that that transport's internal retry is | ||
| # not enabled. | ||
| self._wrapped_transport = httpx.AsyncHTTPTransport(**transport_kwargs) | ||
| | ||
| async def handle_async_request(self, request: httpx.Request) -> httpx.Response: | ||
| return await self._dispatch_with_retry( | ||
| request, self._wrapped_transport.handle_async_request) | ||
| | ||
| # Two types of retries | ||
| # - Status code (500s, redirect) | ||
| # - Error code (read, connect, other) | ||
| async def _dispatch_with_retry( | ||
| self, | ||
| request: httpx.Request, | ||
| dispatch_method: Callable[[httpx.Request], CoroutineType[Any, Any, httpx.Response]] | ||
| ) -> httpx.Response: | ||
| """Sends a request with retry logic using a provided dispatch method.""" | ||
| # This request config is used across all requests that use this transport and therefore | ||
| # needs to be copied to be used for just this request ans it's retries. | ||
| retry = self._retry.copy() | ||
| # First request | ||
| response, error = None, None | ||
| | ||
| while not retry.is_exhausted(): | ||
| | ||
| # First retry | ||
| if response: | ||
| await retry.sleep(response) | ||
| | ||
| # Need to reset here so only last attempt's error or response is saved. | ||
| response, error = None, None | ||
| | ||
| try: | ||
| logger.debug('Sending request in _dispatch_with_retry(): %r', request) | ||
| Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we plan to keep these logs in the production code? Contributor Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would want to keep these for future debugging. This would be helpful to catch issues as we iterate. Wdyt? | ||
| response = await dispatch_method(request) | ||
| logger.debug('Received response: %r', response) | ||
| except httpx.HTTPError as err: | ||
| logger.debug('Received error: %r', err) | ||
| error = err | ||
| | ||
| if response and not retry.is_retryable_response(response): | ||
| return response | ||
| | ||
| if error and not retry.is_retryable_error(error): | ||
| raise error | ||
| | ||
| retry.increment(request, response) | ||
jonathanedey marked this conversation as resolved. Outdated Show resolved Hide resolved | ||
| | ||
| if response: | ||
| return response | ||
| if error: | ||
| raise error | ||
| raise Exception('_dispatch_with_retry() ended with no response or exception') | ||
jonathanedey marked this conversation as resolved. Outdated Show resolved Hide resolved | ||
| | ||
| async def aclose(self) -> None: | ||
| await self._wrapped_transport.aclose() | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit. This suggestion is invalid because no changes were made to the code. Suggestions cannot be applied while the pull request is closed. Suggestions cannot be applied while viewing a subset of changes. Only one suggestion per line can be applied in a batch. Add this suggestion to a batch that can be applied as a single commit. Applying suggestions on deleted lines is not supported. You must change the existing code in this line in order to create a valid suggestion. Outdated suggestions cannot be applied. This suggestion has been applied or marked resolved. Suggestions cannot be applied from pending reviews. Suggestions cannot be applied on multi-line comments. Suggestions cannot be applied while the pull request is queued to merge. Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it make sense to call this
max_retriesinstead?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the current state yes because status retries are our only retries we have. This was more of a future proof decision for when we added error retires where we would have total, status and error counters.
Similar to the placeholder comment I think we can use
max_retries(andretries_leftinternally since we decrease this value) and add the other options later.