From 154394f68fbe20a91f663a4fc87f9d0b3d98ec68 Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Fri, 23 Jan 2026 23:15:42 -0800
Subject: [PATCH 1/2] hot fix

---
 eval_protocol/proxy/proxy_core/litellm.py | 24 +++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py
index cdd2383b..1ce1a792 100644
--- a/eval_protocol/proxy/proxy_core/litellm.py
+++ b/eval_protocol/proxy/proxy_core/litellm.py
@@ -4,6 +4,7 @@
 
 import json
 import base64
+import asyncio
 import httpx
 import logging
 from uuid6 import uuid7
@@ -14,6 +15,12 @@
 
 logger = logging.getLogger(__name__)
 
+# Retry configuration for 404 errors
+# 8 retries with exponential backoff (1, 2, 4, 8, 16, 32, 64, 128 seconds)
+# Total wait time: ~255 seconds (~4.25 minutes)
+MAX_RETRIES_ON_404 = 8
+RETRY_BASE_DELAY_SECONDS = 1
+
 
 async def handle_chat_completion(
     config: ProxyConfig,
@@ -108,12 +115,29 @@ async def handle_chat_completion(
         # Forward to LiteLLM
         litellm_url = f"{config.litellm_url}/chat/completions"
 
+        # Retry loop with exponential backoff for 404 errors
+        # Initial request
         response = await client.post(
             litellm_url,
             json=data,  # httpx will serialize and set correct Content-Length
             headers=headers,
         )
 
+        for attempt in range(MAX_RETRIES_ON_404):
+            if response.status_code != 404:
+                break
+
+            # Wait with exponential backoff before retry
+            delay = RETRY_BASE_DELAY_SECONDS * (2**attempt)
+            logger.warning(f"Got 404 from LiteLLM, retrying in {delay}s (attempt {attempt + 1}/{MAX_RETRIES_ON_404})")
+            await asyncio.sleep(delay)
+
+            response = await client.post(
+                litellm_url,
+                json=data,
+                headers=headers,
+            )
+
         # Register insertion_id in Redis only on successful response
         if response.status_code == 200 and insertion_id is not None and rollout_id is not None:
             register_insertion_id(redis_client, rollout_id, insertion_id)

From 9186f68f46b5822fa4a3bcdadfe29f7124bb7cd4 Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Fri, 23 Jan 2026 23:18:29 -0800
Subject: [PATCH 2/2] hot fix

---
 eval_protocol/proxy/proxy_core/litellm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py
index 1ce1a792..8ff4d013 100644
--- a/eval_protocol/proxy/proxy_core/litellm.py
+++ b/eval_protocol/proxy/proxy_core/litellm.py
@@ -18,7 +18,7 @@
 # Retry configuration for 404 errors
 # 8 retries with exponential backoff (1, 2, 4, 8, 16, 32, 64, 128 seconds)
 # Total wait time: ~255 seconds (~4.25 minutes)
-MAX_RETRIES_ON_404 = 8
+MAX_RETRIES_ON_404 = 9
 RETRY_BASE_DELAY_SECONDS = 1