Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions eval_protocol/proxy/proxy_core/litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import json
import base64
import asyncio
import httpx
import logging
from uuid6 import uuid7
Expand All @@ -14,6 +15,12 @@

logger = logging.getLogger(__name__)

# Retry configuration for 404 errors
# 8 retries with exponential backoff (1, 2, 4, 8, 16, 32, 64, 128 seconds)
# Total wait time: ~255 seconds (~4.25 minutes)
MAX_RETRIES_ON_404 = 9
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Retry count mismatch: 9 retries instead of documented 8

Medium Severity

The comment states "8 retries" with delays of "1, 2, 4, 8, 16, 32, 64, 128 seconds" and total wait time of "~255 seconds", but MAX_RETRIES_ON_404 = 9 causes 9 retries. The 9th retry adds a 256-second delay, nearly doubling total wait time to ~511 seconds (~8.5 minutes). This could cause users to wait twice as long as intended when hitting persistent 404 errors.

Additional Locations (1)

Fix in Cursor Fix in Web

RETRY_BASE_DELAY_SECONDS = 1


async def handle_chat_completion(
config: ProxyConfig,
Expand Down Expand Up @@ -108,12 +115,29 @@ async def handle_chat_completion(
# Forward to LiteLLM
litellm_url = f"{config.litellm_url}/chat/completions"

# Retry loop with exponential backoff for 404 errors
# Initial request
response = await client.post(
litellm_url,
json=data, # httpx will serialize and set correct Content-Length
headers=headers,
)

for attempt in range(MAX_RETRIES_ON_404):
if response.status_code != 404:
break

# Wait with exponential backoff before retry
delay = RETRY_BASE_DELAY_SECONDS * (2**attempt)
logger.warning(f"Got 404 from LiteLLM, retrying in {delay}s (attempt {attempt + 1}/{MAX_RETRIES_ON_404})")
await asyncio.sleep(delay)

response = await client.post(
litellm_url,
json=data,
headers=headers,
)

# Register insertion_id in Redis only on successful response
if response.status_code == 200 and insertion_id is not None and rollout_id is not None:
register_insertion_id(redis_client, rollout_id, insertion_id)
Expand Down
Loading