diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py index cdd2383b..8ff4d013 100644 --- a/eval_protocol/proxy/proxy_core/litellm.py +++ b/eval_protocol/proxy/proxy_core/litellm.py @@ -4,6 +4,7 @@ import json import base64 +import asyncio import httpx import logging from uuid6 import uuid7 @@ -14,6 +15,12 @@ logger = logging.getLogger(__name__) +# Retry configuration for 404 errors +# 8 retries with exponential backoff (1, 2, 4, 8, 16, 32, 64, 128 seconds) +# Total wait time: ~255 seconds (~4.25 minutes) +MAX_RETRIES_ON_404 = 9 +RETRY_BASE_DELAY_SECONDS = 1 + async def handle_chat_completion( config: ProxyConfig, @@ -108,12 +115,29 @@ async def handle_chat_completion( # Forward to LiteLLM litellm_url = f"{config.litellm_url}/chat/completions" + # Retry loop with exponential backoff for 404 errors + # Initial request response = await client.post( litellm_url, json=data, # httpx will serialize and set correct Content-Length headers=headers, ) + for attempt in range(MAX_RETRIES_ON_404): + if response.status_code != 404: + break + + # Wait with exponential backoff before retry + delay = RETRY_BASE_DELAY_SECONDS * (2**attempt) + logger.warning(f"Got 404 from LiteLLM, retrying in {delay}s (attempt {attempt + 1}/{MAX_RETRIES_ON_404})") + await asyncio.sleep(delay) + + response = await client.post( + litellm_url, + json=data, + headers=headers, + ) + # Register insertion_id in Redis only on successful response if response.status_code == 200 and insertion_id is not None and rollout_id is not None: register_insertion_id(redis_client, rollout_id, insertion_id)