From 3be3dcc66746de827eec4a10e06623e91bf46d4f Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 26 Jan 2026 16:35:21 -0800 Subject: [PATCH 01/12] use litellm sdk --- eval_protocol/proxy/Dockerfile.gateway | 4 +- eval_protocol/proxy/README.md | 18 +-- eval_protocol/proxy/config_no_cache.yaml | 3 +- eval_protocol/proxy/docker-compose.yml | 31 +---- eval_protocol/proxy/proxy_core/app.py | 20 +-- eval_protocol/proxy/proxy_core/litellm.py | 159 +++++++++------------- eval_protocol/proxy/proxy_core/models.py | 1 - eval_protocol/proxy/requirements.txt | 4 + 8 files changed, 89 insertions(+), 151 deletions(-) diff --git a/eval_protocol/proxy/Dockerfile.gateway b/eval_protocol/proxy/Dockerfile.gateway index a9308faa..663d3142 100644 --- a/eval_protocol/proxy/Dockerfile.gateway +++ b/eval_protocol/proxy/Dockerfile.gateway @@ -1,4 +1,4 @@ -# Metadata Extraction Gateway - Sits in front of LiteLLM +# Metadata Extraction Gateway - Uses LiteLLM SDK directly with Langfuse OTEL FROM python:3.11-slim WORKDIR /app @@ -19,5 +19,5 @@ COPY ./proxy_core /app/proxy_core EXPOSE 4000 # Run the gateway as a module -# LITELLM_URL will be set by environment (docker-compose or Cloud Run) +# LANGFUSE_HOST and REDIS_HOST will be set by environment (docker-compose or Cloud Run) CMD ["python", "-m", "proxy_core.main"] diff --git a/eval_protocol/proxy/README.md b/eval_protocol/proxy/README.md index ffcdaf25..9223bba2 100644 --- a/eval_protocol/proxy/README.md +++ b/eval_protocol/proxy/README.md @@ -59,9 +59,9 @@ This enables distributed evaluation systems to track which LLM completions belon - Stores insertion IDs per rollout for completeness checking - Uses Redis Sets: `rollout_id -> {insertion_id_1, insertion_id_2, ...}` -#### 3. **LiteLLM Backend** - - Standard LiteLLM proxy for routing to LLM providers - - Configured with Langfuse callbacks for automatic tracing +#### 3. **LiteLLM SDK (Direct)** + - Uses LiteLLM SDK directly for LLM calls (no separate proxy server needed) + - Integrated with Langfuse via `langfuse_otel` OpenTelemetry callback ## Key Features @@ -244,12 +244,11 @@ Forwards any other request to LiteLLM backend with API key injection. | Variable | Required | Default | Description | |----------|----------|---------|-------------| -| `LITELLM_URL` | Yes | - | URL of LiteLLM backend | | `REDIS_HOST` | Yes | - | Redis hostname | | `REDIS_PORT` | No | 6379 | Redis port | | `REDIS_PASSWORD` | No | - | Redis password | | `SECRETS_PATH` | No | `proxy_core/secrets.yaml` | Path to secrets file (YAML) | -| `LANGFUSE_HOST` | No | `https://cloud.langfuse.com` | Langfuse base URL | +| `LANGFUSE_HOST` | No | `https://us.cloud.langfuse.com` | Langfuse OTEL host for tracing | | `REQUEST_TIMEOUT` | No | 300.0 | Request timeout (LLM calls) in seconds | | `LOG_LEVEL` | No | INFO | Logging level | | `PORT` | No | 4000 | Gateway port | @@ -272,15 +271,14 @@ default_project_id: project-1 ### LiteLLM Configuration -The `config_no_cache.yaml` configures LiteLLM: +The `config_no_cache.yaml` configures LiteLLM (only needed if running a standalone LiteLLM proxy): ```yaml model_list: - model_name: "*" litellm_params: model: "*" litellm_settings: - success_callback: ["langfuse"] - failure_callback: ["langfuse"] + callbacks: ["langfuse_otel"] drop_params: True general_settings: allow_client_side_credentials: true @@ -288,9 +286,11 @@ general_settings: Key settings: - **Wildcard model support**: Route any model to any provider -- **Langfuse callbacks**: Automatic tracing on success/failure +- **Langfuse OTEL**: OpenTelemetry-based tracing via `langfuse_otel` callback - **Client-side credentials**: Accept API keys from request body +**Note:** The proxy now uses the LiteLLM SDK directly with `langfuse_otel` integration, so a separate LiteLLM proxy server is no longer required. + ## Security Considerations ### Authentication diff --git a/eval_protocol/proxy/config_no_cache.yaml b/eval_protocol/proxy/config_no_cache.yaml index 7adb5a72..1d772705 100644 --- a/eval_protocol/proxy/config_no_cache.yaml +++ b/eval_protocol/proxy/config_no_cache.yaml @@ -3,8 +3,7 @@ model_list: litellm_params: model: "*" litellm_settings: - success_callback: ["langfuse"] - failure_callback: ["langfuse"] + callbacks: ["langfuse_otel"] drop_params: True general_settings: allow_client_side_credentials: true diff --git a/eval_protocol/proxy/docker-compose.yml b/eval_protocol/proxy/docker-compose.yml index a6058e0e..10659634 100644 --- a/eval_protocol/proxy/docker-compose.yml +++ b/eval_protocol/proxy/docker-compose.yml @@ -7,41 +7,19 @@ services: ports: - "6379:6379" # Expose for debugging if needed networks: - - litellm-network + - proxy-network restart: unless-stopped command: redis-server --appendonly yes volumes: - redis-data:/data - # LiteLLM Backend - Handles actual LLM proxying - litellm-backend: - image: litellm/litellm:v1.77.3-stable - platform: linux/amd64 - container_name: litellm-backend - command: ["--config", "/app/config.yaml", "--port", "4000", "--host", "0.0.0.0"] - # If you want to be able to use other model providers like OpenAI, Anthropic, etc., you need to set keys in .env file. - env_file: - - .env # Load API keys from .env file - environment: - - LANGFUSE_PUBLIC_KEY=dummy # Set dummy public and private key so Langfuse instance initializes in LiteLLM, then real keys get sent in proxy - - LANGFUSE_SECRET_KEY=dummy - volumes: - - ./config_no_cache.yaml:/app/config.yaml:ro - ports: - - "4001:4000" # Expose on 4001 for direct access if needed - networks: - - litellm-network - restart: unless-stopped - - # Metadata Gateway - Public-facing service that extracts metadata from URLs + # Metadata Gateway - Handles LLM calls directly via LiteLLM SDK with Langfuse OTEL metadata-gateway: build: context: . dockerfile: Dockerfile.gateway container_name: metadata-gateway environment: - # Point to the LiteLLM backend service - - LITELLM_URL=http://litellm-backend:4000 - PORT=4000 # Redis configuration for assistant message counting - REDIS_HOST=redis @@ -56,14 +34,13 @@ services: ports: - "4000:4000" # Main public-facing port networks: - - litellm-network + - proxy-network depends_on: - - litellm-backend - redis restart: unless-stopped networks: - litellm-network: + proxy-network: driver: bridge volumes: diff --git a/eval_protocol/proxy/proxy_core/app.py b/eval_protocol/proxy/proxy_core/app.py index 751d5dc1..ab1bfde5 100644 --- a/eval_protocol/proxy/proxy_core/app.py +++ b/eval_protocol/proxy/proxy_core/app.py @@ -15,7 +15,7 @@ from .models import ProxyConfig, LangfuseTracesResponse, TracesParams, ChatParams, ChatRequestHook, TracesRequestHook from .auth import AuthProvider, NoAuthProvider -from .litellm import handle_chat_completion, proxy_to_litellm +from .litellm import handle_chat_completion from .langfuse import fetch_langfuse_traces, pointwise_fetch_langfuse_trace # Configure logging before any other imports (so all modules inherit this config) @@ -35,10 +35,6 @@ def build_proxy_config( preprocess_traces_request: Optional[TracesRequestHook] = None, ) -> ProxyConfig: """Load environment and secrets, and build ProxyConfig""" - # Env - litellm_url = os.getenv("LITELLM_URL") - if not litellm_url: - raise ValueError("LITELLM_URL environment variable must be set") request_timeout = float(os.getenv("REQUEST_TIMEOUT", "300.0")) langfuse_host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com") @@ -66,7 +62,6 @@ def build_proxy_config( raise ValueError(f"Invalid format in secrets file {secrets_path.name}: {e}") return ProxyConfig( - litellm_url=litellm_url, request_timeout=request_timeout, langfuse_host=langfuse_host, langfuse_keys=langfuse_keys, @@ -113,6 +108,10 @@ async def lifespan(app: FastAPI): app.state.config = build_proxy_config(preprocess_chat_request, preprocess_traces_request) app.state.redis = init_redis() + import litellm + + litellm.callbacks = ["langfuse_otel"] + try: yield finally: @@ -297,13 +296,4 @@ async def pointwise_get_langfuse_trace( async def health(): return {"status": "healthy", "service": "metadata-proxy"} - # Catch-all - @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"]) - async def catch_all_proxy( - path: str, - request: Request, - config: ProxyConfig = Depends(get_config), - ): - return await proxy_to_litellm(config, path, request) - return app diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py index cdd2383b..b3b5684f 100644 --- a/eval_protocol/proxy/proxy_core/litellm.py +++ b/eval_protocol/proxy/proxy_core/litellm.py @@ -1,20 +1,36 @@ """ -LiteLLM client - handles all communication with LiteLLM service. +LiteLLM client - handles all LLM calls directly via LiteLLM SDK with Langfuse OTEL integration. """ import json import base64 -import httpx import logging +import os from uuid6 import uuid7 from fastapi import Request, Response, HTTPException import redis +from litellm import acompletion + from .redis_utils import register_insertion_id from .models import ProxyConfig, ChatParams logger = logging.getLogger(__name__) +def _configure_langfuse_otel(config: ProxyConfig, project_id: str) -> None: + """Configure Langfuse OTEL credentials via environment variables.""" + public_key = config.langfuse_keys[project_id]["public_key"] + secret_key = config.langfuse_keys[project_id]["secret_key"] + + os.environ["LANGFUSE_PUBLIC_KEY"] = public_key + os.environ["LANGFUSE_SECRET_KEY"] = secret_key + os.environ.setdefault("LANGFUSE_HOST", config.langfuse_host) + + logger.info( + f"Langfuse OTEL configured: project={project_id}, host={os.environ['LANGFUSE_HOST']}, public_key={public_key[:20]}..." + ) + + async def handle_chat_completion( config: ProxyConfig, redis_client: redis.Redis, @@ -22,12 +38,12 @@ async def handle_chat_completion( params: ChatParams, ) -> Response: """ - Handle chat completion requests and forward to LiteLLM. + Handle chat completion requests using LiteLLM SDK directly with Langfuse OTEL. If metadata IDs (rollout_id, etc.) are provided, they'll be added as tags and the assistant message count will be tracked in Redis. - If encoded_base_url is provided, it will be decoded and added to the request. + If encoded_base_url is provided, it will be decoded and used as api_base. """ body = await request.body() data = json.loads(body) if body else {} @@ -50,36 +66,26 @@ async def handle_chat_completion( # Decode and add base_url if provided if encoded_base_url: try: - # Decode from URL-safe base64 decoded_bytes = base64.urlsafe_b64decode(encoded_base_url) - base_url = decoded_bytes.decode("utf-8") - data["base_url"] = base_url - logger.debug(f"Decoded base_url: {base_url}") + data["base_url"] = decoded_bytes.decode("utf-8") + logger.debug(f"Decoded base_url: {data['base_url']}") except Exception as e: logger.error(f"Failed to decode base_url: {e}") raise HTTPException(status_code=400, detail=f"Invalid encoded_base_url: {str(e)}") - # Extract API key from Authorization header and inject into request body + # Extract API key from Authorization header and add to data auth_header = request.headers.get("authorization", "") if auth_header.startswith("Bearer "): - api_key = auth_header.replace("Bearer ", "").strip() - # Only inject API key if model is a Fireworks model - model = data.get("model") - if model and isinstance(model, str) and model.startswith("fireworks_ai"): - data["api_key"] = api_key + data["api_key"] = auth_header.replace("Bearer ", "").strip() - # If metadata IDs are provided, add them as tags + # Build metadata with tags for Langfuse OTEL insertion_id = None + metadata = data.pop("metadata", {}) or {} + tags = list(metadata.pop("tags", []) or []) + if rollout_id is not None: insertion_id = str(uuid7()) - - if "metadata" not in data: - data["metadata"] = {} - if "tags" not in data["metadata"]: - data["metadata"]["tags"] = [] - - # Add extracted IDs as tags - data["metadata"]["tags"].extend( + tags.extend( [ f"rollout_id:{rollout_id}", f"insertion_id:{insertion_id}", @@ -90,84 +96,47 @@ async def handle_chat_completion( ] ) - # Add Langfuse configuration - data["langfuse_public_key"] = config.langfuse_keys[project_id]["public_key"] - data["langfuse_secret_key"] = config.langfuse_keys[project_id]["secret_key"] - data["langfuse_host"] = config.langfuse_host - - # Forward to LiteLLM's standard /chat/completions endpoint - # Set longer timeout for LLM API calls (LLMs can be slow) - timeout = httpx.Timeout(config.request_timeout) - async with httpx.AsyncClient(timeout=timeout) as client: - # Copy headers from original request but exclude content-length (httpx will set it correctly) - headers = dict(request.headers) - headers.pop("host", None) - headers.pop("content-length", None) # Let httpx calculate the correct length - headers["content-type"] = "application/json" - - # Forward to LiteLLM - litellm_url = f"{config.litellm_url}/chat/completions" - - response = await client.post( - litellm_url, - json=data, # httpx will serialize and set correct Content-Length - headers=headers, + # Configure Langfuse OTEL + _configure_langfuse_otel(config, project_id) + + # Build Langfuse OTEL metadata (becomes span attributes prefixed with langfuse.*) + litellm_metadata = {"tags": tags, **metadata} + if rollout_id is not None: + litellm_metadata["trace_id"] = rollout_id + litellm_metadata["generation_name"] = f"chat-{insertion_id}" + + try: + # Make the completion call - pass all params through + response = await acompletion( + **data, + metadata=litellm_metadata, + timeout=config.request_timeout, ) - # Register insertion_id in Redis only on successful response - if response.status_code == 200 and insertion_id is not None and rollout_id is not None: + # Register insertion_id in Redis on success + if insertion_id is not None and rollout_id is not None: register_insertion_id(redis_client, rollout_id, insertion_id) - # Return the response + # Convert ModelResponse to JSON return Response( - content=response.content, - status_code=response.status_code, - headers=dict(response.headers), - ) - - -async def proxy_to_litellm(config: ProxyConfig, path: str, request: Request) -> Response: - """ - Catch-all proxy: Forward any request to LiteLLM, extracting API key from Authorization header. - """ - # Set longer timeout for LLM API calls (LLMs can be slow) - timeout = httpx.Timeout(config.request_timeout) - async with httpx.AsyncClient(timeout=timeout) as client: - # Copy headers - headers = dict(request.headers) - headers.pop("host", None) - headers.pop("content-length", None) - - # Get body - body = await request.body() - - # Pass through API key from Authorization header - if request.method in ["POST", "PUT", "PATCH"] and body: - try: - data = json.loads(body) - - auth_header = request.headers.get("authorization", "") - if auth_header.startswith("Bearer "): - api_key = auth_header.replace("Bearer ", "").strip() - data["api_key"] = api_key - - # Re-serialize - body = json.dumps(data).encode() - except json.JSONDecodeError: - pass - - # Forward to LiteLLM - litellm_url = f"{config.litellm_url}/{path}" - - response = await client.request( - method=request.method, - url=litellm_url, - headers=headers, - content=body, + content=response.model_dump_json(), + status_code=200, + media_type="application/json", ) + except HTTPException: + raise + except Exception as e: + logger.error(f"LiteLLM error: {e}", exc_info=True) return Response( - content=response.content, - status_code=response.status_code, - headers=dict(response.headers), + content=json.dumps( + { + "error": { + "message": str(e), + "type": type(e).__name__, + } + } + ), + status_code=500, + media_type="application/json", ) diff --git a/eval_protocol/proxy/proxy_core/models.py b/eval_protocol/proxy/proxy_core/models.py index f3b5e614..bf60cf71 100644 --- a/eval_protocol/proxy/proxy_core/models.py +++ b/eval_protocol/proxy/proxy_core/models.py @@ -53,7 +53,6 @@ class TracesParams(BaseModel): class ProxyConfig(BaseModel): """Configuration model for the LiteLLM Metadata Proxy""" - litellm_url: str request_timeout: float = 300.0 langfuse_host: str langfuse_keys: Dict[str, Dict[str, str]] diff --git a/eval_protocol/proxy/requirements.txt b/eval_protocol/proxy/requirements.txt index 15d21d0b..b395bdb0 100644 --- a/eval_protocol/proxy/requirements.txt +++ b/eval_protocol/proxy/requirements.txt @@ -5,3 +5,7 @@ redis>=5.0.0 langfuse>=2.0.0 uuid6>=2025.0.0 PyYAML>=6.0.0 +litellm>=1.77.0 +opentelemetry-api>=1.20.0 +opentelemetry-sdk>=1.20.0 +opentelemetry-exporter-otlp>=1.20.0 From 68bfdc67eaa5eb5b86f9dfd9e2a5bf465546446e Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 26 Jan 2026 17:17:25 -0800 Subject: [PATCH 02/12] address comments --- eval_protocol/proxy/proxy_core/litellm.py | 80 ++++++++++++----------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py index b3b5684f..98334861 100644 --- a/eval_protocol/proxy/proxy_core/litellm.py +++ b/eval_protocol/proxy/proxy_core/litellm.py @@ -5,10 +5,11 @@ import json import base64 import logging -import os from uuid6 import uuid7 from fastapi import Request, Response, HTTPException +from fastapi.responses import StreamingResponse import redis +import openai from litellm import acompletion from .redis_utils import register_insertion_id @@ -17,20 +18,6 @@ logger = logging.getLogger(__name__) -def _configure_langfuse_otel(config: ProxyConfig, project_id: str) -> None: - """Configure Langfuse OTEL credentials via environment variables.""" - public_key = config.langfuse_keys[project_id]["public_key"] - secret_key = config.langfuse_keys[project_id]["secret_key"] - - os.environ["LANGFUSE_PUBLIC_KEY"] = public_key - os.environ["LANGFUSE_SECRET_KEY"] = secret_key - os.environ.setdefault("LANGFUSE_HOST", config.langfuse_host) - - logger.info( - f"Langfuse OTEL configured: project={project_id}, host={os.environ['LANGFUSE_HOST']}, public_key={public_key[:20]}..." - ) - - async def handle_chat_completion( config: ProxyConfig, redis_client: redis.Redis, @@ -78,7 +65,7 @@ async def handle_chat_completion( if auth_header.startswith("Bearer "): data["api_key"] = auth_header.replace("Bearer ", "").strip() - # Build metadata with tags for Langfuse OTEL + # Build metadata with tags for Langfuse insertion_id = None metadata = data.pop("metadata", {}) or {} tags = list(metadata.pop("tags", []) or []) @@ -96,47 +83,62 @@ async def handle_chat_completion( ] ) - # Configure Langfuse OTEL - _configure_langfuse_otel(config, project_id) - - # Build Langfuse OTEL metadata (becomes span attributes prefixed with langfuse.*) + # Build Langfuse metadata (tags, trace context) litellm_metadata = {"tags": tags, **metadata} if rollout_id is not None: litellm_metadata["trace_id"] = rollout_id litellm_metadata["generation_name"] = f"chat-{insertion_id}" + langfuse_keys = config.langfuse_keys[project_id] + + # Check if streaming is requested + is_streaming = data.get("stream", False) + try: # Make the completion call - pass all params through response = await acompletion( **data, metadata=litellm_metadata, timeout=config.request_timeout, + langfuse_public_key=langfuse_keys["public_key"], + langfuse_secret_key=langfuse_keys["secret_key"], ) # Register insertion_id in Redis on success if insertion_id is not None and rollout_id is not None: register_insertion_id(redis_client, rollout_id, insertion_id) - # Convert ModelResponse to JSON - return Response( - content=response.model_dump_json(), - status_code=200, - media_type="application/json", - ) + if is_streaming: + # For streaming, return a StreamingResponse with SSE format + async def stream_generator(): + async for chunk in response: # type: ignore[union-attr] + yield f"data: {chunk.model_dump_json()}\n\n" + yield "data: [DONE]\n\n" + + return StreamingResponse( + stream_generator(), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + }, + ) + else: + # Non-streaming: return JSON response + return Response( + content=response.model_dump_json(), + status_code=200, + media_type="application/json", + ) except HTTPException: raise - except Exception as e: - logger.error(f"LiteLLM error: {e}", exc_info=True) - return Response( - content=json.dumps( - { - "error": { - "message": str(e), - "type": type(e).__name__, - } - } - ), - status_code=500, - media_type="application/json", + except openai.APIError as e: + # Convert to HTTPException and let FastAPI handle it + raise HTTPException( + status_code=getattr(e, "status_code", 500), + detail=str(e), ) + except Exception as e: + logger.error(f"Unexpected error: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) From 7ff50dbbdb87ce47f867038364c4040fffebf3a6 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 26 Jan 2026 17:20:05 -0800 Subject: [PATCH 03/12] another fix --- eval_protocol/proxy/proxy_core/litellm.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py index 98334861..7ea619db 100644 --- a/eval_protocol/proxy/proxy_core/litellm.py +++ b/eval_protocol/proxy/proxy_core/litellm.py @@ -94,12 +94,15 @@ async def handle_chat_completion( # Check if streaming is requested is_streaming = data.get("stream", False) + # Pop timeout to avoid duplicate kwarg - use client's if provided, else config default + request_timeout = data.pop("timeout", None) or config.request_timeout + try: # Make the completion call - pass all params through response = await acompletion( **data, metadata=litellm_metadata, - timeout=config.request_timeout, + timeout=request_timeout, langfuse_public_key=langfuse_keys["public_key"], langfuse_secret_key=langfuse_keys["secret_key"], ) From c6b0a36b18b9059d87f2117c8008a33337b7abc0 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 26 Jan 2026 17:36:18 -0800 Subject: [PATCH 04/12] unify packages and also set keys when initializing --- eval_protocol/proxy/Dockerfile.gateway | 14 ++++++++------ eval_protocol/proxy/docker-compose.yml | 4 ++-- eval_protocol/proxy/proxy_core/app.py | 6 ++++++ eval_protocol/proxy/requirements.txt | 11 ----------- pyproject.toml | 3 +++ 5 files changed, 19 insertions(+), 19 deletions(-) delete mode 100644 eval_protocol/proxy/requirements.txt diff --git a/eval_protocol/proxy/Dockerfile.gateway b/eval_protocol/proxy/Dockerfile.gateway index 663d3142..7fc41f46 100644 --- a/eval_protocol/proxy/Dockerfile.gateway +++ b/eval_protocol/proxy/Dockerfile.gateway @@ -6,14 +6,16 @@ WORKDIR /app # Prevent Python from buffering stdout/stderr ENV PYTHONUNBUFFERED=1 -# Copy requirements file -COPY ./requirements.txt /app/requirements.txt +# Copy the entire package for local install (context is repo root) +COPY pyproject.toml /app/pyproject.toml +COPY eval_protocol /app/eval_protocol +COPY README.md /app/README.md -# Install dependencies -RUN pip install --no-cache-dir -r requirements.txt +# Install from local source with proxy extras +RUN pip install --no-cache-dir ".[proxy]" -# Copy the proxy package -COPY ./proxy_core /app/proxy_core +# Copy the proxy package (local overrides for main.py, auth.py, etc.) +COPY eval_protocol/proxy/proxy_core /app/proxy_core # Expose port EXPOSE 4000 diff --git a/eval_protocol/proxy/docker-compose.yml b/eval_protocol/proxy/docker-compose.yml index 10659634..0983e2eb 100644 --- a/eval_protocol/proxy/docker-compose.yml +++ b/eval_protocol/proxy/docker-compose.yml @@ -16,8 +16,8 @@ services: # Metadata Gateway - Handles LLM calls directly via LiteLLM SDK with Langfuse OTEL metadata-gateway: build: - context: . - dockerfile: Dockerfile.gateway + context: ../.. + dockerfile: eval_protocol/proxy/Dockerfile.gateway container_name: metadata-gateway environment: - PORT=4000 diff --git a/eval_protocol/proxy/proxy_core/app.py b/eval_protocol/proxy/proxy_core/app.py index ab1bfde5..633df539 100644 --- a/eval_protocol/proxy/proxy_core/app.py +++ b/eval_protocol/proxy/proxy_core/app.py @@ -108,6 +108,12 @@ async def lifespan(app: FastAPI): app.state.config = build_proxy_config(preprocess_chat_request, preprocess_traces_request) app.state.redis = init_redis() + config = app.state.config + default_keys = config.langfuse_keys[config.default_project_id] + os.environ["LANGFUSE_PUBLIC_KEY"] = default_keys["public_key"] + os.environ["LANGFUSE_SECRET_KEY"] = default_keys["secret_key"] + os.environ.setdefault("LANGFUSE_HOST", config.langfuse_host) + import litellm litellm.callbacks = ["langfuse_otel"] diff --git a/eval_protocol/proxy/requirements.txt b/eval_protocol/proxy/requirements.txt deleted file mode 100644 index b395bdb0..00000000 --- a/eval_protocol/proxy/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -fastapi>=0.116.1 -uvicorn>=0.24.0 -httpx>=0.25.0 -redis>=5.0.0 -langfuse>=2.0.0 -uuid6>=2025.0.0 -PyYAML>=6.0.0 -litellm>=1.77.0 -opentelemetry-api>=1.20.0 -opentelemetry-sdk>=1.20.0 -opentelemetry-exporter-otlp>=1.20.0 diff --git a/pyproject.toml b/pyproject.toml index e5caa497..f6af71dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -153,6 +153,9 @@ proxy = [ "redis>=5.0.0", "langfuse>=2.0.0", "uuid6>=2025.0.0", + "opentelemetry-api>=1.20.0", + "opentelemetry-sdk>=1.20.0", + "opentelemetry-exporter-otlp>=1.20.0", ] [project.scripts] From aaf2d0e193fb7204399d46fe51a28f4164787b14 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 26 Jan 2026 17:38:50 -0800 Subject: [PATCH 05/12] add comment --- eval_protocol/proxy/proxy_core/litellm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py index 7ea619db..b93d1787 100644 --- a/eval_protocol/proxy/proxy_core/litellm.py +++ b/eval_protocol/proxy/proxy_core/litellm.py @@ -99,6 +99,7 @@ async def handle_chat_completion( try: # Make the completion call - pass all params through + # Note: langfuse_host is set via LANGFUSE_HOST env var at startup; OTEL doesn't support per-request host override response = await acompletion( **data, metadata=litellm_metadata, From 91dc416d1af23df2004c8925ef28c212b8eb3977 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 26 Jan 2026 17:47:24 -0800 Subject: [PATCH 06/12] update --- eval_protocol/proxy/proxy_core/litellm.py | 18 +++++++++--------- uv.lock | 6 ++++++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py index b93d1787..ae042e68 100644 --- a/eval_protocol/proxy/proxy_core/litellm.py +++ b/eval_protocol/proxy/proxy_core/litellm.py @@ -83,11 +83,8 @@ async def handle_chat_completion( ] ) - # Build Langfuse metadata (tags, trace context) + # Build Langfuse metadata (tags) litellm_metadata = {"tags": tags, **metadata} - if rollout_id is not None: - litellm_metadata["trace_id"] = rollout_id - litellm_metadata["generation_name"] = f"chat-{insertion_id}" langfuse_keys = config.langfuse_keys[project_id] @@ -108,16 +105,16 @@ async def handle_chat_completion( langfuse_secret_key=langfuse_keys["secret_key"], ) - # Register insertion_id in Redis on success - if insertion_id is not None and rollout_id is not None: - register_insertion_id(redis_client, rollout_id, insertion_id) - if is_streaming: # For streaming, return a StreamingResponse with SSE format + # Register insertion_id only after stream completes successfully async def stream_generator(): async for chunk in response: # type: ignore[union-attr] yield f"data: {chunk.model_dump_json()}\n\n" yield "data: [DONE]\n\n" + # Stream completed successfully - now register + if insertion_id is not None and rollout_id is not None: + register_insertion_id(redis_client, rollout_id, insertion_id) return StreamingResponse( stream_generator(), @@ -128,7 +125,10 @@ async def stream_generator(): }, ) else: - # Non-streaming: return JSON response + # Non-streaming: register insertion_id on success + if insertion_id is not None and rollout_id is not None: + register_insertion_id(redis_client, rollout_id, insertion_id) + return Response( content=response.model_dump_json(), status_code=200, diff --git a/uv.lock b/uv.lock index c175b81f..c4bd20c9 100644 --- a/uv.lock +++ b/uv.lock @@ -1265,6 +1265,9 @@ openevals = [ ] proxy = [ { name = "langfuse" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp" }, + { name = "opentelemetry-sdk" }, { name = "redis" }, { name = "uuid6" }, ] @@ -1340,6 +1343,9 @@ requires-dist = [ { name = "openai", marker = "extra == 'dev'", specifier = ">=1.78.1" }, { name = "openenv-core", marker = "extra == 'openenv'" }, { name = "openevals", marker = "extra == 'openevals'", specifier = ">=0.1.0" }, + { name = "opentelemetry-api", marker = "extra == 'proxy'", specifier = ">=1.20.0" }, + { name = "opentelemetry-exporter-otlp", marker = "extra == 'proxy'", specifier = ">=1.20.0" }, + { name = "opentelemetry-sdk", marker = "extra == 'proxy'", specifier = ">=1.20.0" }, { name = "pandas", marker = "extra == 'dev'", specifier = ">=1.5.0" }, { name = "peewee", specifier = ">=3.18.2" }, { name = "peft", marker = "extra == 'trl'", specifier = ">=0.7.0" }, From d6acf244b2072a305511451cc98f2dd784e98bf5 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 26 Jan 2026 17:57:51 -0800 Subject: [PATCH 07/12] nit --- eval_protocol/proxy/proxy_core/litellm.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py index ae042e68..979a4e95 100644 --- a/eval_protocol/proxy/proxy_core/litellm.py +++ b/eval_protocol/proxy/proxy_core/litellm.py @@ -91,8 +91,10 @@ async def handle_chat_completion( # Check if streaming is requested is_streaming = data.get("stream", False) - # Pop timeout to avoid duplicate kwarg - use client's if provided, else config default + # Pop fields that we pass explicitly to avoid duplicate kwarg errors request_timeout = data.pop("timeout", None) or config.request_timeout + data.pop("langfuse_public_key", None) + data.pop("langfuse_secret_key", None) try: # Make the completion call - pass all params through From 63270781bf680bbefe382f308edee3f8e90fc22c Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 26 Jan 2026 18:27:40 -0800 Subject: [PATCH 08/12] update --- eval_protocol/adapters/fireworks_tracing.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/eval_protocol/adapters/fireworks_tracing.py b/eval_protocol/adapters/fireworks_tracing.py index 3c701ab2..4b0c9cb9 100644 --- a/eval_protocol/adapters/fireworks_tracing.py +++ b/eval_protocol/adapters/fireworks_tracing.py @@ -46,7 +46,7 @@ def __call__( ... -def extract_openai_response(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]: +def extract_otel_attributes(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]: """Attempt to extract and parse attributes from raw_gen_ai_request observation. This only works when stored in OTEL format. Args: @@ -137,9 +137,14 @@ def convert_trace_dict_to_evaluation_row( observations = trace.get("observations") or [] # We can only extract when stored in OTEL format. - openai_response = extract_openai_response(observations) - if openai_response: - choices = openai_response.get("llm.openai.choices") + otel_attributes = extract_otel_attributes(observations) + if otel_attributes: + # Find choices from any provider (llm.*.choices pattern) + choices = None + for key, value in otel_attributes.items(): + if key.endswith(".choices") and isinstance(value, list): + choices = value + break if choices and len(choices) > 0: execution_metadata.finish_reason = choices[0].get("finish_reason") From df637bf69dbdcb686f4cff491f7a6fe24ed27d78 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 26 Jan 2026 18:53:11 -0800 Subject: [PATCH 09/12] add --- pyproject.toml | 13 +++--- uv.lock | 117 ++++++++++++++++++++++++++++++++++--------------- 2 files changed, 89 insertions(+), 41 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f6af71dc..bc417c40 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ "omegaconf>=2.3.0", "httpx>=0.24.0", "anthropic>=0.59.0", - "litellm<1.75.0", + "litellm>=1.81.0,<1.82.0", "pytest>=6.0.0", "pytest-asyncio>=0.21.0", "peewee>=3.18.2", @@ -146,16 +146,17 @@ langgraph = [ langgraph_tools = [ "langgraph>=0.6.7", "langchain>=0.3.0", - "langchain-fireworks>=0.3.0", + # langchain-fireworks removed: incompatible with fireworks-ai>=1.0.0 ] proxy = [ "redis>=5.0.0", - "langfuse>=2.0.0", + "langfuse>=2.0.0,<3.0.0", "uuid6>=2025.0.0", - "opentelemetry-api>=1.20.0", - "opentelemetry-sdk>=1.20.0", - "opentelemetry-exporter-otlp>=1.20.0", + "litellm>=1.81.0,<1.82.0", + "opentelemetry-api>=1.29.0", + "opentelemetry-sdk>=1.29.0", + "opentelemetry-exporter-otlp>=1.29.0", ] [project.scripts] diff --git a/uv.lock b/uv.lock index c4bd20c9..977a9c07 100644 --- a/uv.lock +++ b/uv.lock @@ -1251,7 +1251,6 @@ langgraph = [ ] langgraph-tools = [ { name = "langchain" }, - { name = "langchain-fireworks" }, { name = "langgraph" }, ] langsmith = [ @@ -1265,6 +1264,7 @@ openevals = [ ] proxy = [ { name = "langfuse" }, + { name = "litellm" }, { name = "opentelemetry-api" }, { name = "opentelemetry-exporter-otlp" }, { name = "opentelemetry-sdk" }, @@ -1329,13 +1329,13 @@ requires-dist = [ { name = "langchain", marker = "extra == 'langgraph-tools'", specifier = ">=0.3.0" }, { name = "langchain-core", marker = "extra == 'langchain'", specifier = ">=0.3.0" }, { name = "langchain-core", marker = "extra == 'langgraph'", specifier = ">=0.3.75" }, - { name = "langchain-fireworks", marker = "extra == 'langgraph-tools'", specifier = ">=0.3.0" }, { name = "langfuse", marker = "extra == 'langfuse'", specifier = ">=2.0.0" }, - { name = "langfuse", marker = "extra == 'proxy'", specifier = ">=2.0.0" }, + { name = "langfuse", marker = "extra == 'proxy'", specifier = ">=2.0.0,<3.0.0" }, { name = "langgraph", marker = "extra == 'langgraph'", specifier = ">=0.6.7" }, { name = "langgraph", marker = "extra == 'langgraph-tools'", specifier = ">=0.6.7" }, { name = "langsmith", marker = "extra == 'langsmith'", specifier = ">=0.1.86" }, - { name = "litellm", specifier = "<1.75.0" }, + { name = "litellm", specifier = ">=1.81.0,<1.82.0" }, + { name = "litellm", marker = "extra == 'proxy'", specifier = ">=1.81.0,<1.82.0" }, { name = "loguru", specifier = ">=0.6.0" }, { name = "mcp", specifier = ">=1.9.2" }, { name = "omegaconf", specifier = ">=2.3.0" }, @@ -1343,9 +1343,9 @@ requires-dist = [ { name = "openai", marker = "extra == 'dev'", specifier = ">=1.78.1" }, { name = "openenv-core", marker = "extra == 'openenv'" }, { name = "openevals", marker = "extra == 'openevals'", specifier = ">=0.1.0" }, - { name = "opentelemetry-api", marker = "extra == 'proxy'", specifier = ">=1.20.0" }, - { name = "opentelemetry-exporter-otlp", marker = "extra == 'proxy'", specifier = ">=1.20.0" }, - { name = "opentelemetry-sdk", marker = "extra == 'proxy'", specifier = ">=1.20.0" }, + { name = "opentelemetry-api", marker = "extra == 'proxy'", specifier = ">=1.29.0" }, + { name = "opentelemetry-exporter-otlp", marker = "extra == 'proxy'", specifier = ">=1.29.0" }, + { name = "opentelemetry-sdk", marker = "extra == 'proxy'", specifier = ">=1.29.0" }, { name = "pandas", marker = "extra == 'dev'", specifier = ">=1.5.0" }, { name = "peewee", specifier = ">=3.18.2" }, { name = "peft", marker = "extra == 'trl'", specifier = ">=0.7.0" }, @@ -1577,6 +1577,69 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/05/4958cccbe862958d862b6a15f2d10d2f5ec3c411268dcb131a433e5e7a0d/fastmcp-2.10.6-py3-none-any.whl", hash = "sha256:9782416a8848cc0f4cfcc578e5c17834da620bef8ecf4d0daabf5dd1272411a2", size = 202613, upload-time = "2025-07-19T20:02:11.47Z" }, ] +[[package]] +name = "fastuuid" +version = "0.14.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/7d/d9daedf0f2ebcacd20d599928f8913e9d2aea1d56d2d355a93bfa2b611d7/fastuuid-0.14.0.tar.gz", hash = "sha256:178947fc2f995b38497a74172adee64fdeb8b7ec18f2a5934d037641ba265d26", size = 18232, upload-time = "2025-10-19T22:19:22.402Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ad/b2/731a6696e37cd20eed353f69a09f37a984a43c9713764ee3f7ad5f57f7f9/fastuuid-0.14.0-cp310-cp310-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:6e6243d40f6c793c3e2ee14c13769e341b90be5ef0c23c82fa6515a96145181a", size = 516760, upload-time = "2025-10-19T22:25:21.509Z" }, + { url = "https://files.pythonhosted.org/packages/c5/79/c73c47be2a3b8734d16e628982653517f80bbe0570e27185d91af6096507/fastuuid-0.14.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:13ec4f2c3b04271f62be2e1ce7e95ad2dd1cf97e94503a3760db739afbd48f00", size = 264748, upload-time = "2025-10-19T22:41:52.873Z" }, + { url = "https://files.pythonhosted.org/packages/24/c5/84c1eea05977c8ba5173555b0133e3558dc628bcf868d6bf1689ff14aedc/fastuuid-0.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b2fdd48b5e4236df145a149d7125badb28e0a383372add3fbaac9a6b7a394470", size = 254537, upload-time = "2025-10-19T22:33:55.603Z" }, + { url = "https://files.pythonhosted.org/packages/0e/23/4e362367b7fa17dbed646922f216b9921efb486e7abe02147e4b917359f8/fastuuid-0.14.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f74631b8322d2780ebcf2d2d75d58045c3e9378625ec51865fe0b5620800c39d", size = 278994, upload-time = "2025-10-19T22:26:17.631Z" }, + { url = "https://files.pythonhosted.org/packages/b2/72/3985be633b5a428e9eaec4287ed4b873b7c4c53a9639a8b416637223c4cd/fastuuid-0.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83cffc144dc93eb604b87b179837f2ce2af44871a7b323f2bfed40e8acb40ba8", size = 280003, upload-time = "2025-10-19T22:23:45.415Z" }, + { url = "https://files.pythonhosted.org/packages/b3/6d/6ef192a6df34e2266d5c9deb39cd3eea986df650cbcfeaf171aa52a059c3/fastuuid-0.14.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1a771f135ab4523eb786e95493803942a5d1fc1610915f131b363f55af53b219", size = 303583, upload-time = "2025-10-19T22:26:00.756Z" }, + { url = "https://files.pythonhosted.org/packages/9d/11/8a2ea753c68d4fece29d5d7c6f3f903948cc6e82d1823bc9f7f7c0355db3/fastuuid-0.14.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4edc56b877d960b4eda2c4232f953a61490c3134da94f3c28af129fb9c62a4f6", size = 460955, upload-time = "2025-10-19T22:36:25.196Z" }, + { url = "https://files.pythonhosted.org/packages/23/42/7a32c93b6ce12642d9a152ee4753a078f372c9ebb893bc489d838dd4afd5/fastuuid-0.14.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bcc96ee819c282e7c09b2eed2b9bd13084e3b749fdb2faf58c318d498df2efbe", size = 480763, upload-time = "2025-10-19T22:24:28.451Z" }, + { url = "https://files.pythonhosted.org/packages/b9/e9/a5f6f686b46e3ed4ed3b93770111c233baac87dd6586a411b4988018ef1d/fastuuid-0.14.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7a3c0bca61eacc1843ea97b288d6789fbad7400d16db24e36a66c28c268cfe3d", size = 452613, upload-time = "2025-10-19T22:25:06.827Z" }, + { url = "https://files.pythonhosted.org/packages/b4/c9/18abc73c9c5b7fc0e476c1733b678783b2e8a35b0be9babd423571d44e98/fastuuid-0.14.0-cp310-cp310-win32.whl", hash = "sha256:7f2f3efade4937fae4e77efae1af571902263de7b78a0aee1a1653795a093b2a", size = 155045, upload-time = "2025-10-19T22:28:32.732Z" }, + { url = "https://files.pythonhosted.org/packages/5e/8a/d9e33f4eb4d4f6d9f2c5c7d7e96b5cdbb535c93f3b1ad6acce97ee9d4bf8/fastuuid-0.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:ae64ba730d179f439b0736208b4c279b8bc9c089b102aec23f86512ea458c8a4", size = 156122, upload-time = "2025-10-19T22:23:15.59Z" }, + { url = "https://files.pythonhosted.org/packages/98/f3/12481bda4e5b6d3e698fbf525df4443cc7dce746f246b86b6fcb2fba1844/fastuuid-0.14.0-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:73946cb950c8caf65127d4e9a325e2b6be0442a224fd51ba3b6ac44e1912ce34", size = 516386, upload-time = "2025-10-19T22:42:40.176Z" }, + { url = "https://files.pythonhosted.org/packages/59/19/2fc58a1446e4d72b655648eb0879b04e88ed6fa70d474efcf550f640f6ec/fastuuid-0.14.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:12ac85024637586a5b69645e7ed986f7535106ed3013640a393a03e461740cb7", size = 264569, upload-time = "2025-10-19T22:25:50.977Z" }, + { url = "https://files.pythonhosted.org/packages/78/29/3c74756e5b02c40cfcc8b1d8b5bac4edbd532b55917a6bcc9113550e99d1/fastuuid-0.14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:05a8dde1f395e0c9b4be515b7a521403d1e8349443e7641761af07c7ad1624b1", size = 254366, upload-time = "2025-10-19T22:29:49.166Z" }, + { url = "https://files.pythonhosted.org/packages/52/96/d761da3fccfa84f0f353ce6e3eb8b7f76b3aa21fd25e1b00a19f9c80a063/fastuuid-0.14.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09378a05020e3e4883dfdab438926f31fea15fd17604908f3d39cbeb22a0b4dc", size = 278978, upload-time = "2025-10-19T22:35:41.306Z" }, + { url = "https://files.pythonhosted.org/packages/fc/c2/f84c90167cc7765cb82b3ff7808057608b21c14a38531845d933a4637307/fastuuid-0.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbb0c4b15d66b435d2538f3827f05e44e2baafcc003dd7d8472dc67807ab8fd8", size = 279692, upload-time = "2025-10-19T22:25:36.997Z" }, + { url = "https://files.pythonhosted.org/packages/af/7b/4bacd03897b88c12348e7bd77943bac32ccf80ff98100598fcff74f75f2e/fastuuid-0.14.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cd5a7f648d4365b41dbf0e38fe8da4884e57bed4e77c83598e076ac0c93995e7", size = 303384, upload-time = "2025-10-19T22:29:46.578Z" }, + { url = "https://files.pythonhosted.org/packages/c0/a2/584f2c29641df8bd810d00c1f21d408c12e9ad0c0dafdb8b7b29e5ddf787/fastuuid-0.14.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c0a94245afae4d7af8c43b3159d5e3934c53f47140be0be624b96acd672ceb73", size = 460921, upload-time = "2025-10-19T22:36:42.006Z" }, + { url = "https://files.pythonhosted.org/packages/24/68/c6b77443bb7764c760e211002c8638c0c7cce11cb584927e723215ba1398/fastuuid-0.14.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:2b29e23c97e77c3a9514d70ce343571e469098ac7f5a269320a0f0b3e193ab36", size = 480575, upload-time = "2025-10-19T22:28:18.975Z" }, + { url = "https://files.pythonhosted.org/packages/5a/87/93f553111b33f9bb83145be12868c3c475bf8ea87c107063d01377cc0e8e/fastuuid-0.14.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1e690d48f923c253f28151b3a6b4e335f2b06bf669c68a02665bc150b7839e94", size = 452317, upload-time = "2025-10-19T22:25:32.75Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8c/a04d486ca55b5abb7eaa65b39df8d891b7b1635b22db2163734dc273579a/fastuuid-0.14.0-cp311-cp311-win32.whl", hash = "sha256:a6f46790d59ab38c6aa0e35c681c0484b50dc0acf9e2679c005d61e019313c24", size = 154804, upload-time = "2025-10-19T22:24:15.615Z" }, + { url = "https://files.pythonhosted.org/packages/9c/b2/2d40bf00820de94b9280366a122cbaa60090c8cf59e89ac3938cf5d75895/fastuuid-0.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:e150eab56c95dc9e3fefc234a0eedb342fac433dacc273cd4d150a5b0871e1fa", size = 156099, upload-time = "2025-10-19T22:24:31.646Z" }, + { url = "https://files.pythonhosted.org/packages/02/a2/e78fcc5df65467f0d207661b7ef86c5b7ac62eea337c0c0fcedbeee6fb13/fastuuid-0.14.0-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:77e94728324b63660ebf8adb27055e92d2e4611645bf12ed9d88d30486471d0a", size = 510164, upload-time = "2025-10-19T22:31:45.635Z" }, + { url = "https://files.pythonhosted.org/packages/2b/b3/c846f933f22f581f558ee63f81f29fa924acd971ce903dab1a9b6701816e/fastuuid-0.14.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:caa1f14d2102cb8d353096bc6ef6c13b2c81f347e6ab9d6fbd48b9dea41c153d", size = 261837, upload-time = "2025-10-19T22:38:38.53Z" }, + { url = "https://files.pythonhosted.org/packages/54/ea/682551030f8c4fa9a769d9825570ad28c0c71e30cf34020b85c1f7ee7382/fastuuid-0.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d23ef06f9e67163be38cece704170486715b177f6baae338110983f99a72c070", size = 251370, upload-time = "2025-10-19T22:40:26.07Z" }, + { url = "https://files.pythonhosted.org/packages/14/dd/5927f0a523d8e6a76b70968e6004966ee7df30322f5fc9b6cdfb0276646a/fastuuid-0.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c9ec605ace243b6dbe3bd27ebdd5d33b00d8d1d3f580b39fdd15cd96fd71796", size = 277766, upload-time = "2025-10-19T22:37:23.779Z" }, + { url = "https://files.pythonhosted.org/packages/16/6e/c0fb547eef61293153348f12e0f75a06abb322664b34a1573a7760501336/fastuuid-0.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:808527f2407f58a76c916d6aa15d58692a4a019fdf8d4c32ac7ff303b7d7af09", size = 278105, upload-time = "2025-10-19T22:26:56.821Z" }, + { url = "https://files.pythonhosted.org/packages/2d/b1/b9c75e03b768f61cf2e84ee193dc18601aeaf89a4684b20f2f0e9f52b62c/fastuuid-0.14.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2fb3c0d7fef6674bbeacdd6dbd386924a7b60b26de849266d1ff6602937675c8", size = 301564, upload-time = "2025-10-19T22:30:31.604Z" }, + { url = "https://files.pythonhosted.org/packages/fc/fa/f7395fdac07c7a54f18f801744573707321ca0cee082e638e36452355a9d/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab3f5d36e4393e628a4df337c2c039069344db5f4b9d2a3c9cea48284f1dd741", size = 459659, upload-time = "2025-10-19T22:31:32.341Z" }, + { url = "https://files.pythonhosted.org/packages/66/49/c9fd06a4a0b1f0f048aacb6599e7d96e5d6bc6fa680ed0d46bf111929d1b/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:b9a0ca4f03b7e0b01425281ffd44e99d360e15c895f1907ca105854ed85e2057", size = 478430, upload-time = "2025-10-19T22:26:22.962Z" }, + { url = "https://files.pythonhosted.org/packages/be/9c/909e8c95b494e8e140e8be6165d5fc3f61fdc46198c1554df7b3e1764471/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3acdf655684cc09e60fb7e4cf524e8f42ea760031945aa8086c7eae2eeeabeb8", size = 450894, upload-time = "2025-10-19T22:27:01.647Z" }, + { url = "https://files.pythonhosted.org/packages/90/eb/d29d17521976e673c55ef7f210d4cdd72091a9ec6755d0fd4710d9b3c871/fastuuid-0.14.0-cp312-cp312-win32.whl", hash = "sha256:9579618be6280700ae36ac42c3efd157049fe4dd40ca49b021280481c78c3176", size = 154374, upload-time = "2025-10-19T22:29:19.879Z" }, + { url = "https://files.pythonhosted.org/packages/cc/fc/f5c799a6ea6d877faec0472d0b27c079b47c86b1cdc577720a5386483b36/fastuuid-0.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:d9e4332dc4ba054434a9594cbfaf7823b57993d7d8e7267831c3e059857cf397", size = 156550, upload-time = "2025-10-19T22:27:49.658Z" }, + { url = "https://files.pythonhosted.org/packages/a5/83/ae12dd39b9a39b55d7f90abb8971f1a5f3c321fd72d5aa83f90dc67fe9ed/fastuuid-0.14.0-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:77a09cb7427e7af74c594e409f7731a0cf887221de2f698e1ca0ebf0f3139021", size = 510720, upload-time = "2025-10-19T22:42:34.633Z" }, + { url = "https://files.pythonhosted.org/packages/53/b0/a4b03ff5d00f563cc7546b933c28cb3f2a07344b2aec5834e874f7d44143/fastuuid-0.14.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:9bd57289daf7b153bfa3e8013446aa144ce5e8c825e9e366d455155ede5ea2dc", size = 262024, upload-time = "2025-10-19T22:30:25.482Z" }, + { url = "https://files.pythonhosted.org/packages/9c/6d/64aee0a0f6a58eeabadd582e55d0d7d70258ffdd01d093b30c53d668303b/fastuuid-0.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ac60fc860cdf3c3f327374db87ab8e064c86566ca8c49d2e30df15eda1b0c2d5", size = 251679, upload-time = "2025-10-19T22:36:14.096Z" }, + { url = "https://files.pythonhosted.org/packages/60/f5/a7e9cda8369e4f7919d36552db9b2ae21db7915083bc6336f1b0082c8b2e/fastuuid-0.14.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab32f74bd56565b186f036e33129da77db8be09178cd2f5206a5d4035fb2a23f", size = 277862, upload-time = "2025-10-19T22:36:23.302Z" }, + { url = "https://files.pythonhosted.org/packages/f0/d3/8ce11827c783affffd5bd4d6378b28eb6cc6d2ddf41474006b8d62e7448e/fastuuid-0.14.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33e678459cf4addaedd9936bbb038e35b3f6b2061330fd8f2f6a1d80414c0f87", size = 278278, upload-time = "2025-10-19T22:29:43.809Z" }, + { url = "https://files.pythonhosted.org/packages/a2/51/680fb6352d0bbade04036da46264a8001f74b7484e2fd1f4da9e3db1c666/fastuuid-0.14.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1e3cc56742f76cd25ecb98e4b82a25f978ccffba02e4bdce8aba857b6d85d87b", size = 301788, upload-time = "2025-10-19T22:36:06.825Z" }, + { url = "https://files.pythonhosted.org/packages/fa/7c/2014b5785bd8ebdab04ec857635ebd84d5ee4950186a577db9eff0fb8ff6/fastuuid-0.14.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:cb9a030f609194b679e1660f7e32733b7a0f332d519c5d5a6a0a580991290022", size = 459819, upload-time = "2025-10-19T22:35:31.623Z" }, + { url = "https://files.pythonhosted.org/packages/01/d2/524d4ceeba9160e7a9bc2ea3e8f4ccf1ad78f3bde34090ca0c51f09a5e91/fastuuid-0.14.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:09098762aad4f8da3a888eb9ae01c84430c907a297b97166b8abc07b640f2995", size = 478546, upload-time = "2025-10-19T22:26:03.023Z" }, + { url = "https://files.pythonhosted.org/packages/bc/17/354d04951ce114bf4afc78e27a18cfbd6ee319ab1829c2d5fb5e94063ac6/fastuuid-0.14.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:1383fff584fa249b16329a059c68ad45d030d5a4b70fb7c73a08d98fd53bcdab", size = 450921, upload-time = "2025-10-19T22:31:02.151Z" }, + { url = "https://files.pythonhosted.org/packages/fb/be/d7be8670151d16d88f15bb121c5b66cdb5ea6a0c2a362d0dcf30276ade53/fastuuid-0.14.0-cp313-cp313-win32.whl", hash = "sha256:a0809f8cc5731c066c909047f9a314d5f536c871a7a22e815cc4967c110ac9ad", size = 154559, upload-time = "2025-10-19T22:36:36.011Z" }, + { url = "https://files.pythonhosted.org/packages/22/1d/5573ef3624ceb7abf4a46073d3554e37191c868abc3aecd5289a72f9810a/fastuuid-0.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:0df14e92e7ad3276327631c9e7cec09e32572ce82089c55cb1bb8df71cf394ed", size = 156539, upload-time = "2025-10-19T22:33:35.898Z" }, + { url = "https://files.pythonhosted.org/packages/16/c9/8c7660d1fe3862e3f8acabd9be7fc9ad71eb270f1c65cce9a2b7a31329ab/fastuuid-0.14.0-cp314-cp314-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:b852a870a61cfc26c884af205d502881a2e59cc07076b60ab4a951cc0c94d1ad", size = 510600, upload-time = "2025-10-19T22:43:44.17Z" }, + { url = "https://files.pythonhosted.org/packages/4c/f4/a989c82f9a90d0ad995aa957b3e572ebef163c5299823b4027986f133dfb/fastuuid-0.14.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:c7502d6f54cd08024c3ea9b3514e2d6f190feb2f46e6dbcd3747882264bb5f7b", size = 262069, upload-time = "2025-10-19T22:43:38.38Z" }, + { url = "https://files.pythonhosted.org/packages/da/6c/a1a24f73574ac995482b1326cf7ab41301af0fabaa3e37eeb6b3df00e6e2/fastuuid-0.14.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1ca61b592120cf314cfd66e662a5b54a578c5a15b26305e1b8b618a6f22df714", size = 251543, upload-time = "2025-10-19T22:32:22.537Z" }, + { url = "https://files.pythonhosted.org/packages/1a/20/2a9b59185ba7a6c7b37808431477c2d739fcbdabbf63e00243e37bd6bf49/fastuuid-0.14.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa75b6657ec129d0abded3bec745e6f7ab642e6dba3a5272a68247e85f5f316f", size = 277798, upload-time = "2025-10-19T22:33:53.821Z" }, + { url = "https://files.pythonhosted.org/packages/ef/33/4105ca574f6ded0af6a797d39add041bcfb468a1255fbbe82fcb6f592da2/fastuuid-0.14.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8a0dfea3972200f72d4c7df02c8ac70bad1bb4c58d7e0ec1e6f341679073a7f", size = 278283, upload-time = "2025-10-19T22:29:02.812Z" }, + { url = "https://files.pythonhosted.org/packages/fe/8c/fca59f8e21c4deb013f574eae05723737ddb1d2937ce87cb2a5d20992dc3/fastuuid-0.14.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1bf539a7a95f35b419f9ad105d5a8a35036df35fdafae48fb2fd2e5f318f0d75", size = 301627, upload-time = "2025-10-19T22:35:54.985Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e2/f78c271b909c034d429218f2798ca4e89eeda7983f4257d7865976ddbb6c/fastuuid-0.14.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:9a133bf9cc78fdbd1179cb58a59ad0100aa32d8675508150f3658814aeefeaa4", size = 459778, upload-time = "2025-10-19T22:28:00.999Z" }, + { url = "https://files.pythonhosted.org/packages/1e/f0/5ff209d865897667a2ff3e7a572267a9ced8f7313919f6d6043aed8b1caa/fastuuid-0.14.0-cp314-cp314-musllinux_1_1_i686.whl", hash = "sha256:f54d5b36c56a2d5e1a31e73b950b28a0d83eb0c37b91d10408875a5a29494bad", size = 478605, upload-time = "2025-10-19T22:36:21.764Z" }, + { url = "https://files.pythonhosted.org/packages/e0/c8/2ce1c78f983a2c4987ea865d9516dbdfb141a120fd3abb977ae6f02ba7ca/fastuuid-0.14.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:ec27778c6ca3393ef662e2762dba8af13f4ec1aaa32d08d77f71f2a70ae9feb8", size = 450837, upload-time = "2025-10-19T22:34:37.178Z" }, + { url = "https://files.pythonhosted.org/packages/df/60/dad662ec9a33b4a5fe44f60699258da64172c39bd041da2994422cdc40fe/fastuuid-0.14.0-cp314-cp314-win32.whl", hash = "sha256:e23fc6a83f112de4be0cc1990e5b127c27663ae43f866353166f87df58e73d06", size = 154532, upload-time = "2025-10-19T22:35:18.217Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f6/da4db31001e854025ffd26bc9ba0740a9cbba2c3259695f7c5834908b336/fastuuid-0.14.0-cp314-cp314-win_amd64.whl", hash = "sha256:df61342889d0f5e7a32f7284e55ef95103f2110fee433c2ae7c2c0956d76ac8a", size = 156457, upload-time = "2025-10-19T22:33:44.579Z" }, +] + [[package]] name = "filelock" version = "3.18.0" @@ -2973,22 +3036,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/da/e8/e7a090ebe37f2b071c64e81b99fb1273b3151ae932f560bb94c22f191cde/langchain_core-0.3.80-py3-none-any.whl", hash = "sha256:2141e3838d100d17dce2359f561ec0df52c526bae0de6d4f469f8026c5747456", size = 450786, upload-time = "2025-11-19T22:23:17.133Z" }, ] -[[package]] -name = "langchain-fireworks" -version = "0.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiohttp" }, - { name = "fireworks-ai" }, - { name = "langchain-core" }, - { name = "openai" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1d/80/78ea4a04b1170cfa7564557808fd80e4c6f812cb5655c95a0374ca79c7ac/langchain_fireworks-0.3.0.tar.gz", hash = "sha256:09db8a06cd50df07068c07c4862e87d70b0da0f7d4e1b06f062c292af61c1433", size = 20900, upload-time = "2025-04-23T14:14:32.438Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/05/68/79696d5e1573a674141a44c9c59c04629e1ba25673d64a7b03f3843ae162/langchain_fireworks-0.3.0-py3-none-any.whl", hash = "sha256:ef2ea22f8cae3e654f0e1d3eb3a60c5fcd4a914643ab324507997f89f5831166", size = 17770, upload-time = "2025-04-23T14:14:31.373Z" }, -] - [[package]] name = "langchain-openai" version = "0.3.35" @@ -3017,22 +3064,21 @@ wheels = [ [[package]] name = "langfuse" -version = "3.2.1" +version = "2.60.10" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "anyio" }, { name = "backoff" }, { name = "httpx" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-otlp" }, - { name = "opentelemetry-sdk" }, + { name = "idna" }, { name = "packaging" }, { name = "pydantic" }, { name = "requests" }, { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/61/0d/8fc51099cf337fb3b56cb7d305074bc0223c62e1ccabf80cc6285ccf5b31/langfuse-3.2.1.tar.gz", hash = "sha256:f79b0380dfcf52c7525bb5d7f8e9d8786a6fc8b37867def047bb388930a7beb3", size = 153369, upload-time = "2025-07-16T09:50:28.434Z" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/45/77fdf53c9e9f49bb78f72eba3f992f2f3d8343e05976aabfe1fca276a640/langfuse-2.60.10.tar.gz", hash = "sha256:a26d0d927a28ee01b2d12bb5b862590b643cc4e60a28de6e2b0c2cfff5dbfc6a", size = 152648, upload-time = "2025-09-16T15:08:12.426Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/92/b0/8f08df3f0fa584c4132937690c6dd33e0a116f963ecf2b35567f614e0ca7/langfuse-3.2.1-py3-none-any.whl", hash = "sha256:07a84e8c1eed6ac8e149bdda1431fd866e4aee741b66124316336fb2bc7e6a32", size = 299315, upload-time = "2025-07-16T09:50:26.582Z" }, + { url = "https://files.pythonhosted.org/packages/76/69/08584fbd69e14398d3932a77d0c8d7e20389da3e6470210d6719afba2801/langfuse-2.60.10-py3-none-any.whl", hash = "sha256:815c6369194aa5b2a24f88eb9952f7c3fc863272c41e90642a71f3bc76f4a11f", size = 275568, upload-time = "2025-09-16T15:08:10.166Z" }, ] [[package]] @@ -3120,11 +3166,12 @@ wheels = [ [[package]] name = "litellm" -version = "1.74.9" +version = "1.81.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, { name = "click" }, + { name = "fastuuid" }, { name = "httpx" }, { name = "importlib-metadata" }, { name = "jinja2" }, @@ -3135,9 +3182,9 @@ dependencies = [ { name = "tiktoken" }, { name = "tokenizers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6d/5d/646bebdb4769d77e6a018b9152c9ccf17afe15d0f88974f338d3f2ee7c15/litellm-1.74.9.tar.gz", hash = "sha256:4a32eff70342e1aee4d1cbf2de2a6ed64a7c39d86345c58d4401036af018b7de", size = 9660510, upload-time = "2025-07-28T16:42:39.297Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ce/dd/d70835d5b231617761717cd5ba60342b677693093a71d5ce13ae9d254aee/litellm-1.81.3.tar.gz", hash = "sha256:a7688b429a88abfdd02f2a8c3158ebb5385689cfb7f9d4ac1473d018b2047e1b", size = 13612652, upload-time = "2026-01-25T02:45:58.888Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5f/e4/f1546746049c99c6b8b247e2f34485b9eae36faa9322b84e2a17262e6712/litellm-1.74.9-py3-none-any.whl", hash = "sha256:ab8f8a6e4d8689d3c7c4f9c3bbc7e46212cc3ebc74ddd0f3c0c921bb459c9874", size = 8740449, upload-time = "2025-07-28T16:42:36.8Z" }, + { url = "https://files.pythonhosted.org/packages/83/62/d3f53c665261fdd5bb2401246e005a4ea8194ad1c4d8c663318ae3d638bf/litellm-1.81.3-py3-none-any.whl", hash = "sha256:3f60fd8b727587952ad3dd18b68f5fed538d6f43d15bb0356f4c3a11bccb2b92", size = 11946995, upload-time = "2026-01-25T02:45:55.887Z" }, ] [[package]] @@ -3993,7 +4040,7 @@ wheels = [ [[package]] name = "openai" -version = "1.109.1" +version = "2.15.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -4005,9 +4052,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c6/a1/a303104dc55fc546a3f6914c842d3da471c64eec92043aef8f652eb6c524/openai-1.109.1.tar.gz", hash = "sha256:d173ed8dbca665892a6db099b4a2dfac624f94d20a93f46eb0b56aae940ed869", size = 564133, upload-time = "2025-09-24T13:00:53.075Z" } +sdist = { url = "https://files.pythonhosted.org/packages/94/f4/4690ecb5d70023ce6bfcfeabfe717020f654bde59a775058ec6ac4692463/openai-2.15.0.tar.gz", hash = "sha256:42eb8cbb407d84770633f31bf727d4ffb4138711c670565a41663d9439174fba", size = 627383, upload-time = "2026-01-09T22:10:08.603Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/2a/7dd3d207ec669cacc1f186fd856a0f61dbc255d24f6fdc1a6715d6051b0f/openai-1.109.1-py3-none-any.whl", hash = "sha256:6bcaf57086cf59159b8e27447e4e7dd019db5d29a438072fbd49c290c7e65315", size = 948627, upload-time = "2025-09-24T13:00:50.754Z" }, + { url = "https://files.pythonhosted.org/packages/b5/df/c306f7375d42bafb379934c2df4c2fa3964656c8c782bac75ee10c102818/openai-2.15.0-py3-none-any.whl", hash = "sha256:6ae23b932cd7230f7244e52954daa6602716d6b9bf235401a107af731baea6c3", size = 1067879, upload-time = "2026-01-09T22:10:06.446Z" }, ] [[package]] From ae682dfdcc04a05a0553a509a13b67cce6b3f673 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 26 Jan 2026 19:00:03 -0800 Subject: [PATCH 10/12] pass user --- eval_protocol/proxy/proxy_core/litellm.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py index 979a4e95..e55bfa0d 100644 --- a/eval_protocol/proxy/proxy_core/litellm.py +++ b/eval_protocol/proxy/proxy_core/litellm.py @@ -83,8 +83,12 @@ async def handle_chat_completion( ] ) - # Build Langfuse metadata (tags) + # Build Langfuse metadata (tags + user if present) + # Convert user_id (from preprocess hook) to trace_user_id for Langfuse + user_id = metadata.pop("user_id", None) or data.get("user") litellm_metadata = {"tags": tags, **metadata} + if user_id: + litellm_metadata["trace_user_id"] = user_id langfuse_keys = config.langfuse_keys[project_id] From f4dec1741c5a9271a1424fdee4d8b6b601bdc522 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 26 Jan 2026 22:51:01 -0800 Subject: [PATCH 11/12] undo langfuse --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bc417c40..511df95e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,7 +151,7 @@ langgraph_tools = [ proxy = [ "redis>=5.0.0", - "langfuse>=2.0.0,<3.0.0", + "langfuse>=2.0.0", "uuid6>=2025.0.0", "litellm>=1.81.0,<1.82.0", "opentelemetry-api>=1.29.0", diff --git a/uv.lock b/uv.lock index 977a9c07..3ebbcadc 100644 --- a/uv.lock +++ b/uv.lock @@ -1330,7 +1330,7 @@ requires-dist = [ { name = "langchain-core", marker = "extra == 'langchain'", specifier = ">=0.3.0" }, { name = "langchain-core", marker = "extra == 'langgraph'", specifier = ">=0.3.75" }, { name = "langfuse", marker = "extra == 'langfuse'", specifier = ">=2.0.0" }, - { name = "langfuse", marker = "extra == 'proxy'", specifier = ">=2.0.0,<3.0.0" }, + { name = "langfuse", marker = "extra == 'proxy'", specifier = ">=2.0.0" }, { name = "langgraph", marker = "extra == 'langgraph'", specifier = ">=0.6.7" }, { name = "langgraph", marker = "extra == 'langgraph-tools'", specifier = ">=0.6.7" }, { name = "langsmith", marker = "extra == 'langsmith'", specifier = ">=0.1.86" }, From 4a3cc04cb97ffe5d4753515096fce551e5155f88 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 26 Jan 2026 23:22:07 -0800 Subject: [PATCH 12/12] missing --- eval_protocol/proxy/proxy_core/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/eval_protocol/proxy/proxy_core/models.py b/eval_protocol/proxy/proxy_core/models.py index bf60cf71..062a870c 100644 --- a/eval_protocol/proxy/proxy_core/models.py +++ b/eval_protocol/proxy/proxy_core/models.py @@ -72,6 +72,7 @@ class ObservationResponse(BaseModel): input: Optional[Any] = None output: Optional[Any] = None parent_observation_id: Optional[str] = None + metadata: Optional[Dict[str, Any]] = None class TraceResponse(BaseModel):