From 154394f68fbe20a91f663a4fc87f9d0b3d98ec68 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Fri, 23 Jan 2026 23:15:42 -0800 Subject: [PATCH 1/2] hot fix --- eval_protocol/proxy/proxy_core/litellm.py | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py index cdd2383b..1ce1a792 100644 --- a/eval_protocol/proxy/proxy_core/litellm.py +++ b/eval_protocol/proxy/proxy_core/litellm.py @@ -4,6 +4,7 @@ import json import base64 +import asyncio import httpx import logging from uuid6 import uuid7 @@ -14,6 +15,12 @@ logger = logging.getLogger(__name__) +# Retry configuration for 404 errors +# 8 retries with exponential backoff (1, 2, 4, 8, 16, 32, 64, 128 seconds) +# Total wait time: ~255 seconds (~4.25 minutes) +MAX_RETRIES_ON_404 = 8 +RETRY_BASE_DELAY_SECONDS = 1 + async def handle_chat_completion( config: ProxyConfig, @@ -108,12 +115,29 @@ async def handle_chat_completion( # Forward to LiteLLM litellm_url = f"{config.litellm_url}/chat/completions" + # Retry loop with exponential backoff for 404 errors + # Initial request response = await client.post( litellm_url, json=data, # httpx will serialize and set correct Content-Length headers=headers, ) + for attempt in range(MAX_RETRIES_ON_404): + if response.status_code != 404: + break + + # Wait with exponential backoff before retry + delay = RETRY_BASE_DELAY_SECONDS * (2**attempt) + logger.warning(f"Got 404 from LiteLLM, retrying in {delay}s (attempt {attempt + 1}/{MAX_RETRIES_ON_404})") + await asyncio.sleep(delay) + + response = await client.post( + litellm_url, + json=data, + headers=headers, + ) + # Register insertion_id in Redis only on successful response if response.status_code == 200 and insertion_id is not None and rollout_id is not None: register_insertion_id(redis_client, rollout_id, insertion_id) From 9186f68f46b5822fa4a3bcdadfe29f7124bb7cd4 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Fri, 23 Jan 2026 23:18:29 -0800 Subject: [PATCH 2/2] hot fix --- eval_protocol/proxy/proxy_core/litellm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py index 1ce1a792..8ff4d013 100644 --- a/eval_protocol/proxy/proxy_core/litellm.py +++ b/eval_protocol/proxy/proxy_core/litellm.py @@ -18,7 +18,7 @@ # Retry configuration for 404 errors # 8 retries with exponential backoff (1, 2, 4, 8, 16, 32, 64, 128 seconds) # Total wait time: ~255 seconds (~4.25 minutes) -MAX_RETRIES_ON_404 = 8 +MAX_RETRIES_ON_404 = 9 RETRY_BASE_DELAY_SECONDS = 1