From 3be3dcc66746de827eec4a10e06623e91bf46d4f Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Mon, 26 Jan 2026 16:35:21 -0800
Subject: [PATCH 01/12] use litellm sdk

---
 eval_protocol/proxy/Dockerfile.gateway    |   4 +-
 eval_protocol/proxy/README.md             |  18 +--
 eval_protocol/proxy/config_no_cache.yaml  |   3 +-
 eval_protocol/proxy/docker-compose.yml    |  31 +----
 eval_protocol/proxy/proxy_core/app.py     |  20 +--
 eval_protocol/proxy/proxy_core/litellm.py | 159 +++++++++-------------
 eval_protocol/proxy/proxy_core/models.py  |   1 -
 eval_protocol/proxy/requirements.txt      |   4 +
 8 files changed, 89 insertions(+), 151 deletions(-)

diff --git a/eval_protocol/proxy/Dockerfile.gateway b/eval_protocol/proxy/Dockerfile.gateway
index a9308faa..663d3142 100644
--- a/eval_protocol/proxy/Dockerfile.gateway
+++ b/eval_protocol/proxy/Dockerfile.gateway
@@ -1,4 +1,4 @@
-# Metadata Extraction Gateway - Sits in front of LiteLLM
+# Metadata Extraction Gateway - Uses LiteLLM SDK directly with Langfuse OTEL
 FROM python:3.11-slim
 
 WORKDIR /app
@@ -19,5 +19,5 @@ COPY ./proxy_core /app/proxy_core
 EXPOSE 4000
 
 # Run the gateway as a module
-# LITELLM_URL will be set by environment (docker-compose or Cloud Run)
+# LANGFUSE_HOST and REDIS_HOST will be set by environment (docker-compose or Cloud Run)
 CMD ["python", "-m", "proxy_core.main"]
diff --git a/eval_protocol/proxy/README.md b/eval_protocol/proxy/README.md
index ffcdaf25..9223bba2 100644
--- a/eval_protocol/proxy/README.md
+++ b/eval_protocol/proxy/README.md
@@ -59,9 +59,9 @@ This enables distributed evaluation systems to track which LLM completions belon
    - Stores insertion IDs per rollout for completeness checking
    - Uses Redis Sets: `rollout_id -> {insertion_id_1, insertion_id_2, ...}`
 
-#### 3. **LiteLLM Backend**
-   - Standard LiteLLM proxy for routing to LLM providers
-   - Configured with Langfuse callbacks for automatic tracing
+#### 3. **LiteLLM SDK (Direct)**
+   - Uses LiteLLM SDK directly for LLM calls (no separate proxy server needed)
+   - Integrated with Langfuse via `langfuse_otel` OpenTelemetry callback
 
 ## Key Features
 
@@ -244,12 +244,11 @@ Forwards any other request to LiteLLM backend with API key injection.
 
 | Variable | Required | Default | Description |
 |----------|----------|---------|-------------|
-| `LITELLM_URL` | Yes | - | URL of LiteLLM backend |
 | `REDIS_HOST` | Yes | - | Redis hostname |
 | `REDIS_PORT` | No | 6379 | Redis port |
 | `REDIS_PASSWORD` | No | - | Redis password |
 | `SECRETS_PATH` | No | `proxy_core/secrets.yaml` | Path to secrets file (YAML) |
-| `LANGFUSE_HOST` | No | `https://cloud.langfuse.com` | Langfuse base URL |
+| `LANGFUSE_HOST` | No | `https://us.cloud.langfuse.com` | Langfuse OTEL host for tracing |
 | `REQUEST_TIMEOUT` | No | 300.0 | Request timeout (LLM calls) in seconds |
 | `LOG_LEVEL` | No | INFO | Logging level |
 | `PORT` | No | 4000 | Gateway port |
@@ -272,15 +271,14 @@ default_project_id: project-1
 
 ### LiteLLM Configuration
 
-The `config_no_cache.yaml` configures LiteLLM:
+The `config_no_cache.yaml` configures LiteLLM (only needed if running a standalone LiteLLM proxy):
 ```yaml
 model_list:
   - model_name: "*"
     litellm_params:
       model: "*"
 litellm_settings:
-  success_callback: ["langfuse"]
-  failure_callback: ["langfuse"]
+  callbacks: ["langfuse_otel"]
   drop_params: True
 general_settings:
   allow_client_side_credentials: true
@@ -288,9 +286,11 @@ general_settings:
 
 Key settings:
 - **Wildcard model support**: Route any model to any provider
-- **Langfuse callbacks**: Automatic tracing on success/failure
+- **Langfuse OTEL**: OpenTelemetry-based tracing via `langfuse_otel` callback
 - **Client-side credentials**: Accept API keys from request body
 
+**Note:** The proxy now uses the LiteLLM SDK directly with `langfuse_otel` integration, so a separate LiteLLM proxy server is no longer required.
+
 ## Security Considerations
 
 ### Authentication
diff --git a/eval_protocol/proxy/config_no_cache.yaml b/eval_protocol/proxy/config_no_cache.yaml
index 7adb5a72..1d772705 100644
--- a/eval_protocol/proxy/config_no_cache.yaml
+++ b/eval_protocol/proxy/config_no_cache.yaml
@@ -3,8 +3,7 @@ model_list:
     litellm_params:
       model: "*"
 litellm_settings:
-  success_callback: ["langfuse"]
-  failure_callback: ["langfuse"]
+  callbacks: ["langfuse_otel"]
   drop_params: True
 general_settings:
   allow_client_side_credentials: true
diff --git a/eval_protocol/proxy/docker-compose.yml b/eval_protocol/proxy/docker-compose.yml
index a6058e0e..10659634 100644
--- a/eval_protocol/proxy/docker-compose.yml
+++ b/eval_protocol/proxy/docker-compose.yml
@@ -7,41 +7,19 @@ services:
     ports:
       - "6379:6379"  # Expose for debugging if needed
     networks:
-      - litellm-network
+      - proxy-network
     restart: unless-stopped
     command: redis-server --appendonly yes
     volumes:
       - redis-data:/data
 
-  # LiteLLM Backend - Handles actual LLM proxying
-  litellm-backend:
-    image: litellm/litellm:v1.77.3-stable
-    platform: linux/amd64
-    container_name: litellm-backend
-    command: ["--config", "/app/config.yaml", "--port", "4000", "--host", "0.0.0.0"]
-    # If you want to be able to use other model providers like OpenAI, Anthropic, etc., you need to set keys in .env file.
-    env_file:
-      - .env  # Load API keys from .env file
-    environment:
-      - LANGFUSE_PUBLIC_KEY=dummy  # Set dummy public and private key so Langfuse instance initializes in LiteLLM, then real keys get sent in proxy
-      - LANGFUSE_SECRET_KEY=dummy
-    volumes:
-      - ./config_no_cache.yaml:/app/config.yaml:ro
-    ports:
-      - "4001:4000"  # Expose on 4001 for direct access if needed
-    networks:
-      - litellm-network
-    restart: unless-stopped
-
-  # Metadata Gateway - Public-facing service that extracts metadata from URLs
+  # Metadata Gateway - Handles LLM calls directly via LiteLLM SDK with Langfuse OTEL
   metadata-gateway:
     build:
       context: .
       dockerfile: Dockerfile.gateway
     container_name: metadata-gateway
     environment:
-      # Point to the LiteLLM backend service
-      - LITELLM_URL=http://litellm-backend:4000
       - PORT=4000
       # Redis configuration for assistant message counting
       - REDIS_HOST=redis
@@ -56,14 +34,13 @@ services:
     ports:
       - "4000:4000"  # Main public-facing port
     networks:
-      - litellm-network
+      - proxy-network
     depends_on:
-      - litellm-backend
       - redis
     restart: unless-stopped
 
 networks:
-  litellm-network:
+  proxy-network:
     driver: bridge
 
 volumes:
diff --git a/eval_protocol/proxy/proxy_core/app.py b/eval_protocol/proxy/proxy_core/app.py
index 751d5dc1..ab1bfde5 100644
--- a/eval_protocol/proxy/proxy_core/app.py
+++ b/eval_protocol/proxy/proxy_core/app.py
@@ -15,7 +15,7 @@
 
 from .models import ProxyConfig, LangfuseTracesResponse, TracesParams, ChatParams, ChatRequestHook, TracesRequestHook
 from .auth import AuthProvider, NoAuthProvider
-from .litellm import handle_chat_completion, proxy_to_litellm
+from .litellm import handle_chat_completion
 from .langfuse import fetch_langfuse_traces, pointwise_fetch_langfuse_trace
 
 # Configure logging before any other imports (so all modules inherit this config)
@@ -35,10 +35,6 @@ def build_proxy_config(
     preprocess_traces_request: Optional[TracesRequestHook] = None,
 ) -> ProxyConfig:
     """Load environment and secrets, and build ProxyConfig"""
-    # Env
-    litellm_url = os.getenv("LITELLM_URL")
-    if not litellm_url:
-        raise ValueError("LITELLM_URL environment variable must be set")
     request_timeout = float(os.getenv("REQUEST_TIMEOUT", "300.0"))
     langfuse_host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
 
@@ -66,7 +62,6 @@ def build_proxy_config(
         raise ValueError(f"Invalid format in secrets file {secrets_path.name}: {e}")
 
     return ProxyConfig(
-        litellm_url=litellm_url,
         request_timeout=request_timeout,
         langfuse_host=langfuse_host,
         langfuse_keys=langfuse_keys,
@@ -113,6 +108,10 @@ async def lifespan(app: FastAPI):
         app.state.config = build_proxy_config(preprocess_chat_request, preprocess_traces_request)
         app.state.redis = init_redis()
 
+        import litellm
+
+        litellm.callbacks = ["langfuse_otel"]
+
         try:
             yield
         finally:
@@ -297,13 +296,4 @@ async def pointwise_get_langfuse_trace(
     async def health():
         return {"status": "healthy", "service": "metadata-proxy"}
 
-    # Catch-all
-    @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"])
-    async def catch_all_proxy(
-        path: str,
-        request: Request,
-        config: ProxyConfig = Depends(get_config),
-    ):
-        return await proxy_to_litellm(config, path, request)
-
     return app
diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py
index cdd2383b..b3b5684f 100644
--- a/eval_protocol/proxy/proxy_core/litellm.py
+++ b/eval_protocol/proxy/proxy_core/litellm.py
@@ -1,20 +1,36 @@
 """
-LiteLLM client - handles all communication with LiteLLM service.
+LiteLLM client - handles all LLM calls directly via LiteLLM SDK with Langfuse OTEL integration.
 """
 
 import json
 import base64
-import httpx
 import logging
+import os
 from uuid6 import uuid7
 from fastapi import Request, Response, HTTPException
 import redis
+from litellm import acompletion
+
 from .redis_utils import register_insertion_id
 from .models import ProxyConfig, ChatParams
 
 logger = logging.getLogger(__name__)
 
 
+def _configure_langfuse_otel(config: ProxyConfig, project_id: str) -> None:
+    """Configure Langfuse OTEL credentials via environment variables."""
+    public_key = config.langfuse_keys[project_id]["public_key"]
+    secret_key = config.langfuse_keys[project_id]["secret_key"]
+
+    os.environ["LANGFUSE_PUBLIC_KEY"] = public_key
+    os.environ["LANGFUSE_SECRET_KEY"] = secret_key
+    os.environ.setdefault("LANGFUSE_HOST", config.langfuse_host)
+
+    logger.info(
+        f"Langfuse OTEL configured: project={project_id}, host={os.environ['LANGFUSE_HOST']}, public_key={public_key[:20]}..."
+    )
+
+
 async def handle_chat_completion(
     config: ProxyConfig,
     redis_client: redis.Redis,
@@ -22,12 +38,12 @@ async def handle_chat_completion(
     params: ChatParams,
 ) -> Response:
     """
-    Handle chat completion requests and forward to LiteLLM.
+    Handle chat completion requests using LiteLLM SDK directly with Langfuse OTEL.
 
     If metadata IDs (rollout_id, etc.) are provided, they'll be added as tags
     and the assistant message count will be tracked in Redis.
 
-    If encoded_base_url is provided, it will be decoded and added to the request.
+    If encoded_base_url is provided, it will be decoded and used as api_base.
     """
     body = await request.body()
     data = json.loads(body) if body else {}
@@ -50,36 +66,26 @@ async def handle_chat_completion(
     # Decode and add base_url if provided
     if encoded_base_url:
         try:
-            # Decode from URL-safe base64
             decoded_bytes = base64.urlsafe_b64decode(encoded_base_url)
-            base_url = decoded_bytes.decode("utf-8")
-            data["base_url"] = base_url
-            logger.debug(f"Decoded base_url: {base_url}")
+            data["base_url"] = decoded_bytes.decode("utf-8")
+            logger.debug(f"Decoded base_url: {data['base_url']}")
         except Exception as e:
             logger.error(f"Failed to decode base_url: {e}")
             raise HTTPException(status_code=400, detail=f"Invalid encoded_base_url: {str(e)}")
 
-    # Extract API key from Authorization header and inject into request body
+    # Extract API key from Authorization header and add to data
     auth_header = request.headers.get("authorization", "")
     if auth_header.startswith("Bearer "):
-        api_key = auth_header.replace("Bearer ", "").strip()
-        # Only inject API key if model is a Fireworks model
-        model = data.get("model")
-        if model and isinstance(model, str) and model.startswith("fireworks_ai"):
-            data["api_key"] = api_key
+        data["api_key"] = auth_header.replace("Bearer ", "").strip()
 
-    # If metadata IDs are provided, add them as tags
+    # Build metadata with tags for Langfuse OTEL
     insertion_id = None
+    metadata = data.pop("metadata", {}) or {}
+    tags = list(metadata.pop("tags", []) or [])
+
     if rollout_id is not None:
         insertion_id = str(uuid7())
-
-        if "metadata" not in data:
-            data["metadata"] = {}
-        if "tags" not in data["metadata"]:
-            data["metadata"]["tags"] = []
-
-        # Add extracted IDs as tags
-        data["metadata"]["tags"].extend(
+        tags.extend(
             [
                 f"rollout_id:{rollout_id}",
                 f"insertion_id:{insertion_id}",
@@ -90,84 +96,47 @@ async def handle_chat_completion(
             ]
         )
 
-    # Add Langfuse configuration
-    data["langfuse_public_key"] = config.langfuse_keys[project_id]["public_key"]
-    data["langfuse_secret_key"] = config.langfuse_keys[project_id]["secret_key"]
-    data["langfuse_host"] = config.langfuse_host
-
-    # Forward to LiteLLM's standard /chat/completions endpoint
-    # Set longer timeout for LLM API calls (LLMs can be slow)
-    timeout = httpx.Timeout(config.request_timeout)
-    async with httpx.AsyncClient(timeout=timeout) as client:
-        # Copy headers from original request but exclude content-length (httpx will set it correctly)
-        headers = dict(request.headers)
-        headers.pop("host", None)
-        headers.pop("content-length", None)  # Let httpx calculate the correct length
-        headers["content-type"] = "application/json"
-
-        # Forward to LiteLLM
-        litellm_url = f"{config.litellm_url}/chat/completions"
-
-        response = await client.post(
-            litellm_url,
-            json=data,  # httpx will serialize and set correct Content-Length
-            headers=headers,
+    # Configure Langfuse OTEL
+    _configure_langfuse_otel(config, project_id)
+
+    # Build Langfuse OTEL metadata (becomes span attributes prefixed with langfuse.*)
+    litellm_metadata = {"tags": tags, **metadata}
+    if rollout_id is not None:
+        litellm_metadata["trace_id"] = rollout_id
+        litellm_metadata["generation_name"] = f"chat-{insertion_id}"
+
+    try:
+        # Make the completion call - pass all params through
+        response = await acompletion(
+            **data,
+            metadata=litellm_metadata,
+            timeout=config.request_timeout,
         )
 
-        # Register insertion_id in Redis only on successful response
-        if response.status_code == 200 and insertion_id is not None and rollout_id is not None:
+        # Register insertion_id in Redis on success
+        if insertion_id is not None and rollout_id is not None:
             register_insertion_id(redis_client, rollout_id, insertion_id)
 
-        # Return the response
+        # Convert ModelResponse to JSON
         return Response(
-            content=response.content,
-            status_code=response.status_code,
-            headers=dict(response.headers),
-        )
-
-
-async def proxy_to_litellm(config: ProxyConfig, path: str, request: Request) -> Response:
-    """
-    Catch-all proxy: Forward any request to LiteLLM, extracting API key from Authorization header.
-    """
-    # Set longer timeout for LLM API calls (LLMs can be slow)
-    timeout = httpx.Timeout(config.request_timeout)
-    async with httpx.AsyncClient(timeout=timeout) as client:
-        # Copy headers
-        headers = dict(request.headers)
-        headers.pop("host", None)
-        headers.pop("content-length", None)
-
-        # Get body
-        body = await request.body()
-
-        # Pass through API key from Authorization header
-        if request.method in ["POST", "PUT", "PATCH"] and body:
-            try:
-                data = json.loads(body)
-
-                auth_header = request.headers.get("authorization", "")
-                if auth_header.startswith("Bearer "):
-                    api_key = auth_header.replace("Bearer ", "").strip()
-                    data["api_key"] = api_key
-
-                # Re-serialize
-                body = json.dumps(data).encode()
-            except json.JSONDecodeError:
-                pass
-
-        # Forward to LiteLLM
-        litellm_url = f"{config.litellm_url}/{path}"
-
-        response = await client.request(
-            method=request.method,
-            url=litellm_url,
-            headers=headers,
-            content=body,
+            content=response.model_dump_json(),
+            status_code=200,
+            media_type="application/json",
         )
 
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"LiteLLM error: {e}", exc_info=True)
         return Response(
-            content=response.content,
-            status_code=response.status_code,
-            headers=dict(response.headers),
+            content=json.dumps(
+                {
+                    "error": {
+                        "message": str(e),
+                        "type": type(e).__name__,
+                    }
+                }
+            ),
+            status_code=500,
+            media_type="application/json",
         )
diff --git a/eval_protocol/proxy/proxy_core/models.py b/eval_protocol/proxy/proxy_core/models.py
index f3b5e614..bf60cf71 100644
--- a/eval_protocol/proxy/proxy_core/models.py
+++ b/eval_protocol/proxy/proxy_core/models.py
@@ -53,7 +53,6 @@ class TracesParams(BaseModel):
 class ProxyConfig(BaseModel):
     """Configuration model for the LiteLLM Metadata Proxy"""
 
-    litellm_url: str
     request_timeout: float = 300.0
     langfuse_host: str
     langfuse_keys: Dict[str, Dict[str, str]]
diff --git a/eval_protocol/proxy/requirements.txt b/eval_protocol/proxy/requirements.txt
index 15d21d0b..b395bdb0 100644
--- a/eval_protocol/proxy/requirements.txt
+++ b/eval_protocol/proxy/requirements.txt
@@ -5,3 +5,7 @@ redis>=5.0.0
 langfuse>=2.0.0
 uuid6>=2025.0.0
 PyYAML>=6.0.0
+litellm>=1.77.0
+opentelemetry-api>=1.20.0
+opentelemetry-sdk>=1.20.0
+opentelemetry-exporter-otlp>=1.20.0

From 68bfdc67eaa5eb5b86f9dfd9e2a5bf465546446e Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Mon, 26 Jan 2026 17:17:25 -0800
Subject: [PATCH 02/12] address comments

---
 eval_protocol/proxy/proxy_core/litellm.py | 80 ++++++++++++-----------
 1 file changed, 41 insertions(+), 39 deletions(-)

diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py
index b3b5684f..98334861 100644
--- a/eval_protocol/proxy/proxy_core/litellm.py
+++ b/eval_protocol/proxy/proxy_core/litellm.py
@@ -5,10 +5,11 @@
 import json
 import base64
 import logging
-import os
 from uuid6 import uuid7
 from fastapi import Request, Response, HTTPException
+from fastapi.responses import StreamingResponse
 import redis
+import openai
 from litellm import acompletion
 
 from .redis_utils import register_insertion_id
@@ -17,20 +18,6 @@
 logger = logging.getLogger(__name__)
 
 
-def _configure_langfuse_otel(config: ProxyConfig, project_id: str) -> None:
-    """Configure Langfuse OTEL credentials via environment variables."""
-    public_key = config.langfuse_keys[project_id]["public_key"]
-    secret_key = config.langfuse_keys[project_id]["secret_key"]
-
-    os.environ["LANGFUSE_PUBLIC_KEY"] = public_key
-    os.environ["LANGFUSE_SECRET_KEY"] = secret_key
-    os.environ.setdefault("LANGFUSE_HOST", config.langfuse_host)
-
-    logger.info(
-        f"Langfuse OTEL configured: project={project_id}, host={os.environ['LANGFUSE_HOST']}, public_key={public_key[:20]}..."
-    )
-
-
 async def handle_chat_completion(
     config: ProxyConfig,
     redis_client: redis.Redis,
@@ -78,7 +65,7 @@ async def handle_chat_completion(
     if auth_header.startswith("Bearer "):
         data["api_key"] = auth_header.replace("Bearer ", "").strip()
 
-    # Build metadata with tags for Langfuse OTEL
+    # Build metadata with tags for Langfuse
     insertion_id = None
     metadata = data.pop("metadata", {}) or {}
     tags = list(metadata.pop("tags", []) or [])
@@ -96,47 +83,62 @@ async def handle_chat_completion(
             ]
         )
 
-    # Configure Langfuse OTEL
-    _configure_langfuse_otel(config, project_id)
-
-    # Build Langfuse OTEL metadata (becomes span attributes prefixed with langfuse.*)
+    # Build Langfuse metadata (tags, trace context)
     litellm_metadata = {"tags": tags, **metadata}
     if rollout_id is not None:
         litellm_metadata["trace_id"] = rollout_id
         litellm_metadata["generation_name"] = f"chat-{insertion_id}"
 
+    langfuse_keys = config.langfuse_keys[project_id]
+
+    # Check if streaming is requested
+    is_streaming = data.get("stream", False)
+
     try:
         # Make the completion call - pass all params through
         response = await acompletion(
             **data,
             metadata=litellm_metadata,
             timeout=config.request_timeout,
+            langfuse_public_key=langfuse_keys["public_key"],
+            langfuse_secret_key=langfuse_keys["secret_key"],
         )
 
         # Register insertion_id in Redis on success
         if insertion_id is not None and rollout_id is not None:
             register_insertion_id(redis_client, rollout_id, insertion_id)
 
-        # Convert ModelResponse to JSON
-        return Response(
-            content=response.model_dump_json(),
-            status_code=200,
-            media_type="application/json",
-        )
+        if is_streaming:
+            # For streaming, return a StreamingResponse with SSE format
+            async def stream_generator():
+                async for chunk in response:  # type: ignore[union-attr]
+                    yield f"data: {chunk.model_dump_json()}\n\n"
+                yield "data: [DONE]\n\n"
+
+            return StreamingResponse(
+                stream_generator(),
+                media_type="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "Connection": "keep-alive",
+                },
+            )
+        else:
+            # Non-streaming: return JSON response
+            return Response(
+                content=response.model_dump_json(),
+                status_code=200,
+                media_type="application/json",
+            )
 
     except HTTPException:
         raise
-    except Exception as e:
-        logger.error(f"LiteLLM error: {e}", exc_info=True)
-        return Response(
-            content=json.dumps(
-                {
-                    "error": {
-                        "message": str(e),
-                        "type": type(e).__name__,
-                    }
-                }
-            ),
-            status_code=500,
-            media_type="application/json",
+    except openai.APIError as e:
+        # Convert to HTTPException and let FastAPI handle it
+        raise HTTPException(
+            status_code=getattr(e, "status_code", 500),
+            detail=str(e),
         )
+    except Exception as e:
+        logger.error(f"Unexpected error: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))

From 7ff50dbbdb87ce47f867038364c4040fffebf3a6 Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Mon, 26 Jan 2026 17:20:05 -0800
Subject: [PATCH 03/12] another fix

---
 eval_protocol/proxy/proxy_core/litellm.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py
index 98334861..7ea619db 100644
--- a/eval_protocol/proxy/proxy_core/litellm.py
+++ b/eval_protocol/proxy/proxy_core/litellm.py
@@ -94,12 +94,15 @@ async def handle_chat_completion(
     # Check if streaming is requested
     is_streaming = data.get("stream", False)
 
+    # Pop timeout to avoid duplicate kwarg - use client's if provided, else config default
+    request_timeout = data.pop("timeout", None) or config.request_timeout
+
     try:
         # Make the completion call - pass all params through
         response = await acompletion(
             **data,
             metadata=litellm_metadata,
-            timeout=config.request_timeout,
+            timeout=request_timeout,
             langfuse_public_key=langfuse_keys["public_key"],
             langfuse_secret_key=langfuse_keys["secret_key"],
         )

From c6b0a36b18b9059d87f2117c8008a33337b7abc0 Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Mon, 26 Jan 2026 17:36:18 -0800
Subject: [PATCH 04/12] unify packages and also set keys when initializing

---
 eval_protocol/proxy/Dockerfile.gateway | 14 ++++++++------
 eval_protocol/proxy/docker-compose.yml |  4 ++--
 eval_protocol/proxy/proxy_core/app.py  |  6 ++++++
 eval_protocol/proxy/requirements.txt   | 11 -----------
 pyproject.toml                         |  3 +++
 5 files changed, 19 insertions(+), 19 deletions(-)
 delete mode 100644 eval_protocol/proxy/requirements.txt

diff --git a/eval_protocol/proxy/Dockerfile.gateway b/eval_protocol/proxy/Dockerfile.gateway
index 663d3142..7fc41f46 100644
--- a/eval_protocol/proxy/Dockerfile.gateway
+++ b/eval_protocol/proxy/Dockerfile.gateway
@@ -6,14 +6,16 @@ WORKDIR /app
 # Prevent Python from buffering stdout/stderr
 ENV PYTHONUNBUFFERED=1
 
-# Copy requirements file
-COPY ./requirements.txt /app/requirements.txt
+# Copy the entire package for local install (context is repo root)
+COPY pyproject.toml /app/pyproject.toml
+COPY eval_protocol /app/eval_protocol
+COPY README.md /app/README.md
 
-# Install dependencies
-RUN pip install --no-cache-dir -r requirements.txt
+# Install from local source with proxy extras
+RUN pip install --no-cache-dir ".[proxy]"
 
-# Copy the proxy package
-COPY ./proxy_core /app/proxy_core
+# Copy the proxy package (local overrides for main.py, auth.py, etc.)
+COPY eval_protocol/proxy/proxy_core /app/proxy_core
 
 # Expose port
 EXPOSE 4000
diff --git a/eval_protocol/proxy/docker-compose.yml b/eval_protocol/proxy/docker-compose.yml
index 10659634..0983e2eb 100644
--- a/eval_protocol/proxy/docker-compose.yml
+++ b/eval_protocol/proxy/docker-compose.yml
@@ -16,8 +16,8 @@ services:
   # Metadata Gateway - Handles LLM calls directly via LiteLLM SDK with Langfuse OTEL
   metadata-gateway:
     build:
-      context: .
-      dockerfile: Dockerfile.gateway
+      context: ../..
+      dockerfile: eval_protocol/proxy/Dockerfile.gateway
     container_name: metadata-gateway
     environment:
       - PORT=4000
diff --git a/eval_protocol/proxy/proxy_core/app.py b/eval_protocol/proxy/proxy_core/app.py
index ab1bfde5..633df539 100644
--- a/eval_protocol/proxy/proxy_core/app.py
+++ b/eval_protocol/proxy/proxy_core/app.py
@@ -108,6 +108,12 @@ async def lifespan(app: FastAPI):
         app.state.config = build_proxy_config(preprocess_chat_request, preprocess_traces_request)
         app.state.redis = init_redis()
 
+        config = app.state.config
+        default_keys = config.langfuse_keys[config.default_project_id]
+        os.environ["LANGFUSE_PUBLIC_KEY"] = default_keys["public_key"]
+        os.environ["LANGFUSE_SECRET_KEY"] = default_keys["secret_key"]
+        os.environ.setdefault("LANGFUSE_HOST", config.langfuse_host)
+
         import litellm
 
         litellm.callbacks = ["langfuse_otel"]
diff --git a/eval_protocol/proxy/requirements.txt b/eval_protocol/proxy/requirements.txt
deleted file mode 100644
index b395bdb0..00000000
--- a/eval_protocol/proxy/requirements.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-fastapi>=0.116.1
-uvicorn>=0.24.0
-httpx>=0.25.0
-redis>=5.0.0
-langfuse>=2.0.0
-uuid6>=2025.0.0
-PyYAML>=6.0.0
-litellm>=1.77.0
-opentelemetry-api>=1.20.0
-opentelemetry-sdk>=1.20.0
-opentelemetry-exporter-otlp>=1.20.0
diff --git a/pyproject.toml b/pyproject.toml
index e5caa497..f6af71dc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -153,6 +153,9 @@ proxy = [
     "redis>=5.0.0",
     "langfuse>=2.0.0",
     "uuid6>=2025.0.0",
+    "opentelemetry-api>=1.20.0",
+    "opentelemetry-sdk>=1.20.0",
+    "opentelemetry-exporter-otlp>=1.20.0",
 ]
 
 [project.scripts]

From aaf2d0e193fb7204399d46fe51a28f4164787b14 Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Mon, 26 Jan 2026 17:38:50 -0800
Subject: [PATCH 05/12] add comment

---
 eval_protocol/proxy/proxy_core/litellm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py
index 7ea619db..b93d1787 100644
--- a/eval_protocol/proxy/proxy_core/litellm.py
+++ b/eval_protocol/proxy/proxy_core/litellm.py
@@ -99,6 +99,7 @@ async def handle_chat_completion(
 
     try:
         # Make the completion call - pass all params through
+        # Note: langfuse_host is set via LANGFUSE_HOST env var at startup; OTEL doesn't support per-request host override
         response = await acompletion(
             **data,
             metadata=litellm_metadata,

From 91dc416d1af23df2004c8925ef28c212b8eb3977 Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Mon, 26 Jan 2026 17:47:24 -0800
Subject: [PATCH 06/12] update

---
 eval_protocol/proxy/proxy_core/litellm.py | 18 +++++++++---------
 uv.lock                                   |  6 ++++++
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py
index b93d1787..ae042e68 100644
--- a/eval_protocol/proxy/proxy_core/litellm.py
+++ b/eval_protocol/proxy/proxy_core/litellm.py
@@ -83,11 +83,8 @@ async def handle_chat_completion(
             ]
         )
 
-    # Build Langfuse metadata (tags, trace context)
+    # Build Langfuse metadata (tags)
     litellm_metadata = {"tags": tags, **metadata}
-    if rollout_id is not None:
-        litellm_metadata["trace_id"] = rollout_id
-        litellm_metadata["generation_name"] = f"chat-{insertion_id}"
 
     langfuse_keys = config.langfuse_keys[project_id]
 
@@ -108,16 +105,16 @@ async def handle_chat_completion(
             langfuse_secret_key=langfuse_keys["secret_key"],
         )
 
-        # Register insertion_id in Redis on success
-        if insertion_id is not None and rollout_id is not None:
-            register_insertion_id(redis_client, rollout_id, insertion_id)
-
         if is_streaming:
             # For streaming, return a StreamingResponse with SSE format
+            # Register insertion_id only after stream completes successfully
             async def stream_generator():
                 async for chunk in response:  # type: ignore[union-attr]
                     yield f"data: {chunk.model_dump_json()}\n\n"
                 yield "data: [DONE]\n\n"
+                # Stream completed successfully - now register
+                if insertion_id is not None and rollout_id is not None:
+                    register_insertion_id(redis_client, rollout_id, insertion_id)
 
             return StreamingResponse(
                 stream_generator(),
@@ -128,7 +125,10 @@ async def stream_generator():
                 },
             )
         else:
-            # Non-streaming: return JSON response
+            # Non-streaming: register insertion_id on success
+            if insertion_id is not None and rollout_id is not None:
+                register_insertion_id(redis_client, rollout_id, insertion_id)
+
             return Response(
                 content=response.model_dump_json(),
                 status_code=200,
diff --git a/uv.lock b/uv.lock
index c175b81f..c4bd20c9 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1265,6 +1265,9 @@ openevals = [
 ]
 proxy = [
     { name = "langfuse" },
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-exporter-otlp" },
+    { name = "opentelemetry-sdk" },
     { name = "redis" },
     { name = "uuid6" },
 ]
@@ -1340,6 +1343,9 @@ requires-dist = [
     { name = "openai", marker = "extra == 'dev'", specifier = ">=1.78.1" },
     { name = "openenv-core", marker = "extra == 'openenv'" },
     { name = "openevals", marker = "extra == 'openevals'", specifier = ">=0.1.0" },
+    { name = "opentelemetry-api", marker = "extra == 'proxy'", specifier = ">=1.20.0" },
+    { name = "opentelemetry-exporter-otlp", marker = "extra == 'proxy'", specifier = ">=1.20.0" },
+    { name = "opentelemetry-sdk", marker = "extra == 'proxy'", specifier = ">=1.20.0" },
     { name = "pandas", marker = "extra == 'dev'", specifier = ">=1.5.0" },
     { name = "peewee", specifier = ">=3.18.2" },
     { name = "peft", marker = "extra == 'trl'", specifier = ">=0.7.0" },

From d6acf244b2072a305511451cc98f2dd784e98bf5 Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Mon, 26 Jan 2026 17:57:51 -0800
Subject: [PATCH 07/12] nit

---
 eval_protocol/proxy/proxy_core/litellm.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py
index ae042e68..979a4e95 100644
--- a/eval_protocol/proxy/proxy_core/litellm.py
+++ b/eval_protocol/proxy/proxy_core/litellm.py
@@ -91,8 +91,10 @@ async def handle_chat_completion(
     # Check if streaming is requested
     is_streaming = data.get("stream", False)
 
-    # Pop timeout to avoid duplicate kwarg - use client's if provided, else config default
+    # Pop fields that we pass explicitly to avoid duplicate kwarg errors
     request_timeout = data.pop("timeout", None) or config.request_timeout
+    data.pop("langfuse_public_key", None)
+    data.pop("langfuse_secret_key", None)
 
     try:
         # Make the completion call - pass all params through

From 63270781bf680bbefe382f308edee3f8e90fc22c Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Mon, 26 Jan 2026 18:27:40 -0800
Subject: [PATCH 08/12] update

---
 eval_protocol/adapters/fireworks_tracing.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/eval_protocol/adapters/fireworks_tracing.py b/eval_protocol/adapters/fireworks_tracing.py
index 3c701ab2..4b0c9cb9 100644
--- a/eval_protocol/adapters/fireworks_tracing.py
+++ b/eval_protocol/adapters/fireworks_tracing.py
@@ -46,7 +46,7 @@ def __call__(
         ...
 
 
-def extract_openai_response(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+def extract_otel_attributes(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
     """Attempt to extract and parse attributes from raw_gen_ai_request observation. This only works when stored in OTEL format.
 
     Args:
@@ -137,9 +137,14 @@ def convert_trace_dict_to_evaluation_row(
 
         observations = trace.get("observations") or []
         # We can only extract when stored in OTEL format.
-        openai_response = extract_openai_response(observations)
-        if openai_response:
-            choices = openai_response.get("llm.openai.choices")
+        otel_attributes = extract_otel_attributes(observations)
+        if otel_attributes:
+            # Find choices from any provider (llm.*.choices pattern)
+            choices = None
+            for key, value in otel_attributes.items():
+                if key.endswith(".choices") and isinstance(value, list):
+                    choices = value
+                    break
             if choices and len(choices) > 0:
                 execution_metadata.finish_reason = choices[0].get("finish_reason")
 

From df637bf69dbdcb686f4cff491f7a6fe24ed27d78 Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Mon, 26 Jan 2026 18:53:11 -0800
Subject: [PATCH 09/12] add

---
 pyproject.toml |  13 +++---
 uv.lock        | 117 ++++++++++++++++++++++++++++++++++---------------
 2 files changed, 89 insertions(+), 41 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index f6af71dc..bc417c40 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,7 @@ dependencies = [
     "omegaconf>=2.3.0",
     "httpx>=0.24.0",
     "anthropic>=0.59.0",
-    "litellm<1.75.0",
+    "litellm>=1.81.0,<1.82.0",
     "pytest>=6.0.0",
     "pytest-asyncio>=0.21.0",
     "peewee>=3.18.2",
@@ -146,16 +146,17 @@ langgraph = [
 langgraph_tools = [
     "langgraph>=0.6.7",
     "langchain>=0.3.0",
-    "langchain-fireworks>=0.3.0",
+    # langchain-fireworks removed: incompatible with fireworks-ai>=1.0.0
 ]
 
 proxy = [
     "redis>=5.0.0",
-    "langfuse>=2.0.0",
+    "langfuse>=2.0.0,<3.0.0",
     "uuid6>=2025.0.0",
-    "opentelemetry-api>=1.20.0",
-    "opentelemetry-sdk>=1.20.0",
-    "opentelemetry-exporter-otlp>=1.20.0",
+    "litellm>=1.81.0,<1.82.0",
+    "opentelemetry-api>=1.29.0",
+    "opentelemetry-sdk>=1.29.0",
+    "opentelemetry-exporter-otlp>=1.29.0",
 ]
 
 [project.scripts]
diff --git a/uv.lock b/uv.lock
index c4bd20c9..977a9c07 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1251,7 +1251,6 @@ langgraph = [
 ]
 langgraph-tools = [
     { name = "langchain" },
-    { name = "langchain-fireworks" },
     { name = "langgraph" },
 ]
 langsmith = [
@@ -1265,6 +1264,7 @@ openevals = [
 ]
 proxy = [
     { name = "langfuse" },
+    { name = "litellm" },
     { name = "opentelemetry-api" },
     { name = "opentelemetry-exporter-otlp" },
     { name = "opentelemetry-sdk" },
@@ -1329,13 +1329,13 @@ requires-dist = [
     { name = "langchain", marker = "extra == 'langgraph-tools'", specifier = ">=0.3.0" },
     { name = "langchain-core", marker = "extra == 'langchain'", specifier = ">=0.3.0" },
     { name = "langchain-core", marker = "extra == 'langgraph'", specifier = ">=0.3.75" },
-    { name = "langchain-fireworks", marker = "extra == 'langgraph-tools'", specifier = ">=0.3.0" },
     { name = "langfuse", marker = "extra == 'langfuse'", specifier = ">=2.0.0" },
-    { name = "langfuse", marker = "extra == 'proxy'", specifier = ">=2.0.0" },
+    { name = "langfuse", marker = "extra == 'proxy'", specifier = ">=2.0.0,<3.0.0" },
     { name = "langgraph", marker = "extra == 'langgraph'", specifier = ">=0.6.7" },
     { name = "langgraph", marker = "extra == 'langgraph-tools'", specifier = ">=0.6.7" },
     { name = "langsmith", marker = "extra == 'langsmith'", specifier = ">=0.1.86" },
-    { name = "litellm", specifier = "<1.75.0" },
+    { name = "litellm", specifier = ">=1.81.0,<1.82.0" },
+    { name = "litellm", marker = "extra == 'proxy'", specifier = ">=1.81.0,<1.82.0" },
     { name = "loguru", specifier = ">=0.6.0" },
     { name = "mcp", specifier = ">=1.9.2" },
     { name = "omegaconf", specifier = ">=2.3.0" },
@@ -1343,9 +1343,9 @@ requires-dist = [
     { name = "openai", marker = "extra == 'dev'", specifier = ">=1.78.1" },
     { name = "openenv-core", marker = "extra == 'openenv'" },
     { name = "openevals", marker = "extra == 'openevals'", specifier = ">=0.1.0" },
-    { name = "opentelemetry-api", marker = "extra == 'proxy'", specifier = ">=1.20.0" },
-    { name = "opentelemetry-exporter-otlp", marker = "extra == 'proxy'", specifier = ">=1.20.0" },
-    { name = "opentelemetry-sdk", marker = "extra == 'proxy'", specifier = ">=1.20.0" },
+    { name = "opentelemetry-api", marker = "extra == 'proxy'", specifier = ">=1.29.0" },
+    { name = "opentelemetry-exporter-otlp", marker = "extra == 'proxy'", specifier = ">=1.29.0" },
+    { name = "opentelemetry-sdk", marker = "extra == 'proxy'", specifier = ">=1.29.0" },
     { name = "pandas", marker = "extra == 'dev'", specifier = ">=1.5.0" },
     { name = "peewee", specifier = ">=3.18.2" },
     { name = "peft", marker = "extra == 'trl'", specifier = ">=0.7.0" },
@@ -1577,6 +1577,69 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/dc/05/4958cccbe862958d862b6a15f2d10d2f5ec3c411268dcb131a433e5e7a0d/fastmcp-2.10.6-py3-none-any.whl", hash = "sha256:9782416a8848cc0f4cfcc578e5c17834da620bef8ecf4d0daabf5dd1272411a2", size = 202613, upload-time = "2025-07-19T20:02:11.47Z" },
 ]
 
+[[package]]
+name = "fastuuid"
+version = "0.14.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/7d/d9daedf0f2ebcacd20d599928f8913e9d2aea1d56d2d355a93bfa2b611d7/fastuuid-0.14.0.tar.gz", hash = "sha256:178947fc2f995b38497a74172adee64fdeb8b7ec18f2a5934d037641ba265d26", size = 18232, upload-time = "2025-10-19T22:19:22.402Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ad/b2/731a6696e37cd20eed353f69a09f37a984a43c9713764ee3f7ad5f57f7f9/fastuuid-0.14.0-cp310-cp310-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:6e6243d40f6c793c3e2ee14c13769e341b90be5ef0c23c82fa6515a96145181a", size = 516760, upload-time = "2025-10-19T22:25:21.509Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/79/c73c47be2a3b8734d16e628982653517f80bbe0570e27185d91af6096507/fastuuid-0.14.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:13ec4f2c3b04271f62be2e1ce7e95ad2dd1cf97e94503a3760db739afbd48f00", size = 264748, upload-time = "2025-10-19T22:41:52.873Z" },
+    { url = "https://files.pythonhosted.org/packages/24/c5/84c1eea05977c8ba5173555b0133e3558dc628bcf868d6bf1689ff14aedc/fastuuid-0.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b2fdd48b5e4236df145a149d7125badb28e0a383372add3fbaac9a6b7a394470", size = 254537, upload-time = "2025-10-19T22:33:55.603Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/23/4e362367b7fa17dbed646922f216b9921efb486e7abe02147e4b917359f8/fastuuid-0.14.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f74631b8322d2780ebcf2d2d75d58045c3e9378625ec51865fe0b5620800c39d", size = 278994, upload-time = "2025-10-19T22:26:17.631Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/72/3985be633b5a428e9eaec4287ed4b873b7c4c53a9639a8b416637223c4cd/fastuuid-0.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83cffc144dc93eb604b87b179837f2ce2af44871a7b323f2bfed40e8acb40ba8", size = 280003, upload-time = "2025-10-19T22:23:45.415Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/6d/6ef192a6df34e2266d5c9deb39cd3eea986df650cbcfeaf171aa52a059c3/fastuuid-0.14.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1a771f135ab4523eb786e95493803942a5d1fc1610915f131b363f55af53b219", size = 303583, upload-time = "2025-10-19T22:26:00.756Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/11/8a2ea753c68d4fece29d5d7c6f3f903948cc6e82d1823bc9f7f7c0355db3/fastuuid-0.14.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4edc56b877d960b4eda2c4232f953a61490c3134da94f3c28af129fb9c62a4f6", size = 460955, upload-time = "2025-10-19T22:36:25.196Z" },
+    { url = "https://files.pythonhosted.org/packages/23/42/7a32c93b6ce12642d9a152ee4753a078f372c9ebb893bc489d838dd4afd5/fastuuid-0.14.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bcc96ee819c282e7c09b2eed2b9bd13084e3b749fdb2faf58c318d498df2efbe", size = 480763, upload-time = "2025-10-19T22:24:28.451Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/e9/a5f6f686b46e3ed4ed3b93770111c233baac87dd6586a411b4988018ef1d/fastuuid-0.14.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7a3c0bca61eacc1843ea97b288d6789fbad7400d16db24e36a66c28c268cfe3d", size = 452613, upload-time = "2025-10-19T22:25:06.827Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/c9/18abc73c9c5b7fc0e476c1733b678783b2e8a35b0be9babd423571d44e98/fastuuid-0.14.0-cp310-cp310-win32.whl", hash = "sha256:7f2f3efade4937fae4e77efae1af571902263de7b78a0aee1a1653795a093b2a", size = 155045, upload-time = "2025-10-19T22:28:32.732Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/8a/d9e33f4eb4d4f6d9f2c5c7d7e96b5cdbb535c93f3b1ad6acce97ee9d4bf8/fastuuid-0.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:ae64ba730d179f439b0736208b4c279b8bc9c089b102aec23f86512ea458c8a4", size = 156122, upload-time = "2025-10-19T22:23:15.59Z" },
+    { url = "https://files.pythonhosted.org/packages/98/f3/12481bda4e5b6d3e698fbf525df4443cc7dce746f246b86b6fcb2fba1844/fastuuid-0.14.0-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:73946cb950c8caf65127d4e9a325e2b6be0442a224fd51ba3b6ac44e1912ce34", size = 516386, upload-time = "2025-10-19T22:42:40.176Z" },
+    { url = "https://files.pythonhosted.org/packages/59/19/2fc58a1446e4d72b655648eb0879b04e88ed6fa70d474efcf550f640f6ec/fastuuid-0.14.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:12ac85024637586a5b69645e7ed986f7535106ed3013640a393a03e461740cb7", size = 264569, upload-time = "2025-10-19T22:25:50.977Z" },
+    { url = "https://files.pythonhosted.org/packages/78/29/3c74756e5b02c40cfcc8b1d8b5bac4edbd532b55917a6bcc9113550e99d1/fastuuid-0.14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:05a8dde1f395e0c9b4be515b7a521403d1e8349443e7641761af07c7ad1624b1", size = 254366, upload-time = "2025-10-19T22:29:49.166Z" },
+    { url = "https://files.pythonhosted.org/packages/52/96/d761da3fccfa84f0f353ce6e3eb8b7f76b3aa21fd25e1b00a19f9c80a063/fastuuid-0.14.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09378a05020e3e4883dfdab438926f31fea15fd17604908f3d39cbeb22a0b4dc", size = 278978, upload-time = "2025-10-19T22:35:41.306Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/c2/f84c90167cc7765cb82b3ff7808057608b21c14a38531845d933a4637307/fastuuid-0.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbb0c4b15d66b435d2538f3827f05e44e2baafcc003dd7d8472dc67807ab8fd8", size = 279692, upload-time = "2025-10-19T22:25:36.997Z" },
+    { url = "https://files.pythonhosted.org/packages/af/7b/4bacd03897b88c12348e7bd77943bac32ccf80ff98100598fcff74f75f2e/fastuuid-0.14.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cd5a7f648d4365b41dbf0e38fe8da4884e57bed4e77c83598e076ac0c93995e7", size = 303384, upload-time = "2025-10-19T22:29:46.578Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/a2/584f2c29641df8bd810d00c1f21d408c12e9ad0c0dafdb8b7b29e5ddf787/fastuuid-0.14.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c0a94245afae4d7af8c43b3159d5e3934c53f47140be0be624b96acd672ceb73", size = 460921, upload-time = "2025-10-19T22:36:42.006Z" },
+    { url = "https://files.pythonhosted.org/packages/24/68/c6b77443bb7764c760e211002c8638c0c7cce11cb584927e723215ba1398/fastuuid-0.14.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:2b29e23c97e77c3a9514d70ce343571e469098ac7f5a269320a0f0b3e193ab36", size = 480575, upload-time = "2025-10-19T22:28:18.975Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/87/93f553111b33f9bb83145be12868c3c475bf8ea87c107063d01377cc0e8e/fastuuid-0.14.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1e690d48f923c253f28151b3a6b4e335f2b06bf669c68a02665bc150b7839e94", size = 452317, upload-time = "2025-10-19T22:25:32.75Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8c/a04d486ca55b5abb7eaa65b39df8d891b7b1635b22db2163734dc273579a/fastuuid-0.14.0-cp311-cp311-win32.whl", hash = "sha256:a6f46790d59ab38c6aa0e35c681c0484b50dc0acf9e2679c005d61e019313c24", size = 154804, upload-time = "2025-10-19T22:24:15.615Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/b2/2d40bf00820de94b9280366a122cbaa60090c8cf59e89ac3938cf5d75895/fastuuid-0.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:e150eab56c95dc9e3fefc234a0eedb342fac433dacc273cd4d150a5b0871e1fa", size = 156099, upload-time = "2025-10-19T22:24:31.646Z" },
+    { url = "https://files.pythonhosted.org/packages/02/a2/e78fcc5df65467f0d207661b7ef86c5b7ac62eea337c0c0fcedbeee6fb13/fastuuid-0.14.0-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:77e94728324b63660ebf8adb27055e92d2e4611645bf12ed9d88d30486471d0a", size = 510164, upload-time = "2025-10-19T22:31:45.635Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/b3/c846f933f22f581f558ee63f81f29fa924acd971ce903dab1a9b6701816e/fastuuid-0.14.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:caa1f14d2102cb8d353096bc6ef6c13b2c81f347e6ab9d6fbd48b9dea41c153d", size = 261837, upload-time = "2025-10-19T22:38:38.53Z" },
+    { url = "https://files.pythonhosted.org/packages/54/ea/682551030f8c4fa9a769d9825570ad28c0c71e30cf34020b85c1f7ee7382/fastuuid-0.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d23ef06f9e67163be38cece704170486715b177f6baae338110983f99a72c070", size = 251370, upload-time = "2025-10-19T22:40:26.07Z" },
+    { url = "https://files.pythonhosted.org/packages/14/dd/5927f0a523d8e6a76b70968e6004966ee7df30322f5fc9b6cdfb0276646a/fastuuid-0.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c9ec605ace243b6dbe3bd27ebdd5d33b00d8d1d3f580b39fdd15cd96fd71796", size = 277766, upload-time = "2025-10-19T22:37:23.779Z" },
+    { url = "https://files.pythonhosted.org/packages/16/6e/c0fb547eef61293153348f12e0f75a06abb322664b34a1573a7760501336/fastuuid-0.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:808527f2407f58a76c916d6aa15d58692a4a019fdf8d4c32ac7ff303b7d7af09", size = 278105, upload-time = "2025-10-19T22:26:56.821Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/b1/b9c75e03b768f61cf2e84ee193dc18601aeaf89a4684b20f2f0e9f52b62c/fastuuid-0.14.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2fb3c0d7fef6674bbeacdd6dbd386924a7b60b26de849266d1ff6602937675c8", size = 301564, upload-time = "2025-10-19T22:30:31.604Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/fa/f7395fdac07c7a54f18f801744573707321ca0cee082e638e36452355a9d/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab3f5d36e4393e628a4df337c2c039069344db5f4b9d2a3c9cea48284f1dd741", size = 459659, upload-time = "2025-10-19T22:31:32.341Z" },
+    { url = "https://files.pythonhosted.org/packages/66/49/c9fd06a4a0b1f0f048aacb6599e7d96e5d6bc6fa680ed0d46bf111929d1b/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:b9a0ca4f03b7e0b01425281ffd44e99d360e15c895f1907ca105854ed85e2057", size = 478430, upload-time = "2025-10-19T22:26:22.962Z" },
+    { url = "https://files.pythonhosted.org/packages/be/9c/909e8c95b494e8e140e8be6165d5fc3f61fdc46198c1554df7b3e1764471/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3acdf655684cc09e60fb7e4cf524e8f42ea760031945aa8086c7eae2eeeabeb8", size = 450894, upload-time = "2025-10-19T22:27:01.647Z" },
+    { url = "https://files.pythonhosted.org/packages/90/eb/d29d17521976e673c55ef7f210d4cdd72091a9ec6755d0fd4710d9b3c871/fastuuid-0.14.0-cp312-cp312-win32.whl", hash = "sha256:9579618be6280700ae36ac42c3efd157049fe4dd40ca49b021280481c78c3176", size = 154374, upload-time = "2025-10-19T22:29:19.879Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/fc/f5c799a6ea6d877faec0472d0b27c079b47c86b1cdc577720a5386483b36/fastuuid-0.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:d9e4332dc4ba054434a9594cbfaf7823b57993d7d8e7267831c3e059857cf397", size = 156550, upload-time = "2025-10-19T22:27:49.658Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/83/ae12dd39b9a39b55d7f90abb8971f1a5f3c321fd72d5aa83f90dc67fe9ed/fastuuid-0.14.0-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:77a09cb7427e7af74c594e409f7731a0cf887221de2f698e1ca0ebf0f3139021", size = 510720, upload-time = "2025-10-19T22:42:34.633Z" },
+    { url = "https://files.pythonhosted.org/packages/53/b0/a4b03ff5d00f563cc7546b933c28cb3f2a07344b2aec5834e874f7d44143/fastuuid-0.14.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:9bd57289daf7b153bfa3e8013446aa144ce5e8c825e9e366d455155ede5ea2dc", size = 262024, upload-time = "2025-10-19T22:30:25.482Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/6d/64aee0a0f6a58eeabadd582e55d0d7d70258ffdd01d093b30c53d668303b/fastuuid-0.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ac60fc860cdf3c3f327374db87ab8e064c86566ca8c49d2e30df15eda1b0c2d5", size = 251679, upload-time = "2025-10-19T22:36:14.096Z" },
+    { url = "https://files.pythonhosted.org/packages/60/f5/a7e9cda8369e4f7919d36552db9b2ae21db7915083bc6336f1b0082c8b2e/fastuuid-0.14.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab32f74bd56565b186f036e33129da77db8be09178cd2f5206a5d4035fb2a23f", size = 277862, upload-time = "2025-10-19T22:36:23.302Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/d3/8ce11827c783affffd5bd4d6378b28eb6cc6d2ddf41474006b8d62e7448e/fastuuid-0.14.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33e678459cf4addaedd9936bbb038e35b3f6b2061330fd8f2f6a1d80414c0f87", size = 278278, upload-time = "2025-10-19T22:29:43.809Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/51/680fb6352d0bbade04036da46264a8001f74b7484e2fd1f4da9e3db1c666/fastuuid-0.14.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1e3cc56742f76cd25ecb98e4b82a25f978ccffba02e4bdce8aba857b6d85d87b", size = 301788, upload-time = "2025-10-19T22:36:06.825Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/7c/2014b5785bd8ebdab04ec857635ebd84d5ee4950186a577db9eff0fb8ff6/fastuuid-0.14.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:cb9a030f609194b679e1660f7e32733b7a0f332d519c5d5a6a0a580991290022", size = 459819, upload-time = "2025-10-19T22:35:31.623Z" },
+    { url = "https://files.pythonhosted.org/packages/01/d2/524d4ceeba9160e7a9bc2ea3e8f4ccf1ad78f3bde34090ca0c51f09a5e91/fastuuid-0.14.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:09098762aad4f8da3a888eb9ae01c84430c907a297b97166b8abc07b640f2995", size = 478546, upload-time = "2025-10-19T22:26:03.023Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/17/354d04951ce114bf4afc78e27a18cfbd6ee319ab1829c2d5fb5e94063ac6/fastuuid-0.14.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:1383fff584fa249b16329a059c68ad45d030d5a4b70fb7c73a08d98fd53bcdab", size = 450921, upload-time = "2025-10-19T22:31:02.151Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/be/d7be8670151d16d88f15bb121c5b66cdb5ea6a0c2a362d0dcf30276ade53/fastuuid-0.14.0-cp313-cp313-win32.whl", hash = "sha256:a0809f8cc5731c066c909047f9a314d5f536c871a7a22e815cc4967c110ac9ad", size = 154559, upload-time = "2025-10-19T22:36:36.011Z" },
+    { url = "https://files.pythonhosted.org/packages/22/1d/5573ef3624ceb7abf4a46073d3554e37191c868abc3aecd5289a72f9810a/fastuuid-0.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:0df14e92e7ad3276327631c9e7cec09e32572ce82089c55cb1bb8df71cf394ed", size = 156539, upload-time = "2025-10-19T22:33:35.898Z" },
+    { url = "https://files.pythonhosted.org/packages/16/c9/8c7660d1fe3862e3f8acabd9be7fc9ad71eb270f1c65cce9a2b7a31329ab/fastuuid-0.14.0-cp314-cp314-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:b852a870a61cfc26c884af205d502881a2e59cc07076b60ab4a951cc0c94d1ad", size = 510600, upload-time = "2025-10-19T22:43:44.17Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/f4/a989c82f9a90d0ad995aa957b3e572ebef163c5299823b4027986f133dfb/fastuuid-0.14.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:c7502d6f54cd08024c3ea9b3514e2d6f190feb2f46e6dbcd3747882264bb5f7b", size = 262069, upload-time = "2025-10-19T22:43:38.38Z" },
+    { url = "https://files.pythonhosted.org/packages/da/6c/a1a24f73574ac995482b1326cf7ab41301af0fabaa3e37eeb6b3df00e6e2/fastuuid-0.14.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1ca61b592120cf314cfd66e662a5b54a578c5a15b26305e1b8b618a6f22df714", size = 251543, upload-time = "2025-10-19T22:32:22.537Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/20/2a9b59185ba7a6c7b37808431477c2d739fcbdabbf63e00243e37bd6bf49/fastuuid-0.14.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa75b6657ec129d0abded3bec745e6f7ab642e6dba3a5272a68247e85f5f316f", size = 277798, upload-time = "2025-10-19T22:33:53.821Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/33/4105ca574f6ded0af6a797d39add041bcfb468a1255fbbe82fcb6f592da2/fastuuid-0.14.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8a0dfea3972200f72d4c7df02c8ac70bad1bb4c58d7e0ec1e6f341679073a7f", size = 278283, upload-time = "2025-10-19T22:29:02.812Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/8c/fca59f8e21c4deb013f574eae05723737ddb1d2937ce87cb2a5d20992dc3/fastuuid-0.14.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1bf539a7a95f35b419f9ad105d5a8a35036df35fdafae48fb2fd2e5f318f0d75", size = 301627, upload-time = "2025-10-19T22:35:54.985Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/e2/f78c271b909c034d429218f2798ca4e89eeda7983f4257d7865976ddbb6c/fastuuid-0.14.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:9a133bf9cc78fdbd1179cb58a59ad0100aa32d8675508150f3658814aeefeaa4", size = 459778, upload-time = "2025-10-19T22:28:00.999Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/f0/5ff209d865897667a2ff3e7a572267a9ced8f7313919f6d6043aed8b1caa/fastuuid-0.14.0-cp314-cp314-musllinux_1_1_i686.whl", hash = "sha256:f54d5b36c56a2d5e1a31e73b950b28a0d83eb0c37b91d10408875a5a29494bad", size = 478605, upload-time = "2025-10-19T22:36:21.764Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/c8/2ce1c78f983a2c4987ea865d9516dbdfb141a120fd3abb977ae6f02ba7ca/fastuuid-0.14.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:ec27778c6ca3393ef662e2762dba8af13f4ec1aaa32d08d77f71f2a70ae9feb8", size = 450837, upload-time = "2025-10-19T22:34:37.178Z" },
+    { url = "https://files.pythonhosted.org/packages/df/60/dad662ec9a33b4a5fe44f60699258da64172c39bd041da2994422cdc40fe/fastuuid-0.14.0-cp314-cp314-win32.whl", hash = "sha256:e23fc6a83f112de4be0cc1990e5b127c27663ae43f866353166f87df58e73d06", size = 154532, upload-time = "2025-10-19T22:35:18.217Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f6/da4db31001e854025ffd26bc9ba0740a9cbba2c3259695f7c5834908b336/fastuuid-0.14.0-cp314-cp314-win_amd64.whl", hash = "sha256:df61342889d0f5e7a32f7284e55ef95103f2110fee433c2ae7c2c0956d76ac8a", size = 156457, upload-time = "2025-10-19T22:33:44.579Z" },
+]
+
 [[package]]
 name = "filelock"
 version = "3.18.0"
@@ -2973,22 +3036,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/da/e8/e7a090ebe37f2b071c64e81b99fb1273b3151ae932f560bb94c22f191cde/langchain_core-0.3.80-py3-none-any.whl", hash = "sha256:2141e3838d100d17dce2359f561ec0df52c526bae0de6d4f469f8026c5747456", size = 450786, upload-time = "2025-11-19T22:23:17.133Z" },
 ]
 
-[[package]]
-name = "langchain-fireworks"
-version = "0.3.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "aiohttp" },
-    { name = "fireworks-ai" },
-    { name = "langchain-core" },
-    { name = "openai" },
-    { name = "requests" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/1d/80/78ea4a04b1170cfa7564557808fd80e4c6f812cb5655c95a0374ca79c7ac/langchain_fireworks-0.3.0.tar.gz", hash = "sha256:09db8a06cd50df07068c07c4862e87d70b0da0f7d4e1b06f062c292af61c1433", size = 20900, upload-time = "2025-04-23T14:14:32.438Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/05/68/79696d5e1573a674141a44c9c59c04629e1ba25673d64a7b03f3843ae162/langchain_fireworks-0.3.0-py3-none-any.whl", hash = "sha256:ef2ea22f8cae3e654f0e1d3eb3a60c5fcd4a914643ab324507997f89f5831166", size = 17770, upload-time = "2025-04-23T14:14:31.373Z" },
-]
-
 [[package]]
 name = "langchain-openai"
 version = "0.3.35"
@@ -3017,22 +3064,21 @@ wheels = [
 
 [[package]]
 name = "langfuse"
-version = "3.2.1"
+version = "2.60.10"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "anyio" },
     { name = "backoff" },
     { name = "httpx" },
-    { name = "opentelemetry-api" },
-    { name = "opentelemetry-exporter-otlp" },
-    { name = "opentelemetry-sdk" },
+    { name = "idna" },
     { name = "packaging" },
     { name = "pydantic" },
     { name = "requests" },
     { name = "wrapt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/61/0d/8fc51099cf337fb3b56cb7d305074bc0223c62e1ccabf80cc6285ccf5b31/langfuse-3.2.1.tar.gz", hash = "sha256:f79b0380dfcf52c7525bb5d7f8e9d8786a6fc8b37867def047bb388930a7beb3", size = 153369, upload-time = "2025-07-16T09:50:28.434Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/eb/45/77fdf53c9e9f49bb78f72eba3f992f2f3d8343e05976aabfe1fca276a640/langfuse-2.60.10.tar.gz", hash = "sha256:a26d0d927a28ee01b2d12bb5b862590b643cc4e60a28de6e2b0c2cfff5dbfc6a", size = 152648, upload-time = "2025-09-16T15:08:12.426Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/92/b0/8f08df3f0fa584c4132937690c6dd33e0a116f963ecf2b35567f614e0ca7/langfuse-3.2.1-py3-none-any.whl", hash = "sha256:07a84e8c1eed6ac8e149bdda1431fd866e4aee741b66124316336fb2bc7e6a32", size = 299315, upload-time = "2025-07-16T09:50:26.582Z" },
+    { url = "https://files.pythonhosted.org/packages/76/69/08584fbd69e14398d3932a77d0c8d7e20389da3e6470210d6719afba2801/langfuse-2.60.10-py3-none-any.whl", hash = "sha256:815c6369194aa5b2a24f88eb9952f7c3fc863272c41e90642a71f3bc76f4a11f", size = 275568, upload-time = "2025-09-16T15:08:10.166Z" },
 ]
 
 [[package]]
@@ -3120,11 +3166,12 @@ wheels = [
 
 [[package]]
 name = "litellm"
-version = "1.74.9"
+version = "1.81.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
     { name = "click" },
+    { name = "fastuuid" },
     { name = "httpx" },
     { name = "importlib-metadata" },
     { name = "jinja2" },
@@ -3135,9 +3182,9 @@ dependencies = [
     { name = "tiktoken" },
     { name = "tokenizers" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6d/5d/646bebdb4769d77e6a018b9152c9ccf17afe15d0f88974f338d3f2ee7c15/litellm-1.74.9.tar.gz", hash = "sha256:4a32eff70342e1aee4d1cbf2de2a6ed64a7c39d86345c58d4401036af018b7de", size = 9660510, upload-time = "2025-07-28T16:42:39.297Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ce/dd/d70835d5b231617761717cd5ba60342b677693093a71d5ce13ae9d254aee/litellm-1.81.3.tar.gz", hash = "sha256:a7688b429a88abfdd02f2a8c3158ebb5385689cfb7f9d4ac1473d018b2047e1b", size = 13612652, upload-time = "2026-01-25T02:45:58.888Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5f/e4/f1546746049c99c6b8b247e2f34485b9eae36faa9322b84e2a17262e6712/litellm-1.74.9-py3-none-any.whl", hash = "sha256:ab8f8a6e4d8689d3c7c4f9c3bbc7e46212cc3ebc74ddd0f3c0c921bb459c9874", size = 8740449, upload-time = "2025-07-28T16:42:36.8Z" },
+    { url = "https://files.pythonhosted.org/packages/83/62/d3f53c665261fdd5bb2401246e005a4ea8194ad1c4d8c663318ae3d638bf/litellm-1.81.3-py3-none-any.whl", hash = "sha256:3f60fd8b727587952ad3dd18b68f5fed538d6f43d15bb0356f4c3a11bccb2b92", size = 11946995, upload-time = "2026-01-25T02:45:55.887Z" },
 ]
 
 [[package]]
@@ -3993,7 +4040,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "1.109.1"
+version = "2.15.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -4005,9 +4052,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c6/a1/a303104dc55fc546a3f6914c842d3da471c64eec92043aef8f652eb6c524/openai-1.109.1.tar.gz", hash = "sha256:d173ed8dbca665892a6db099b4a2dfac624f94d20a93f46eb0b56aae940ed869", size = 564133, upload-time = "2025-09-24T13:00:53.075Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/f4/4690ecb5d70023ce6bfcfeabfe717020f654bde59a775058ec6ac4692463/openai-2.15.0.tar.gz", hash = "sha256:42eb8cbb407d84770633f31bf727d4ffb4138711c670565a41663d9439174fba", size = 627383, upload-time = "2026-01-09T22:10:08.603Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1d/2a/7dd3d207ec669cacc1f186fd856a0f61dbc255d24f6fdc1a6715d6051b0f/openai-1.109.1-py3-none-any.whl", hash = "sha256:6bcaf57086cf59159b8e27447e4e7dd019db5d29a438072fbd49c290c7e65315", size = 948627, upload-time = "2025-09-24T13:00:50.754Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/df/c306f7375d42bafb379934c2df4c2fa3964656c8c782bac75ee10c102818/openai-2.15.0-py3-none-any.whl", hash = "sha256:6ae23b932cd7230f7244e52954daa6602716d6b9bf235401a107af731baea6c3", size = 1067879, upload-time = "2026-01-09T22:10:06.446Z" },
 ]
 
 [[package]]

From ae682dfdcc04a05a0553a509a13b67cce6b3f673 Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Mon, 26 Jan 2026 19:00:03 -0800
Subject: [PATCH 10/12] pass user

---
 eval_protocol/proxy/proxy_core/litellm.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py
index 979a4e95..e55bfa0d 100644
--- a/eval_protocol/proxy/proxy_core/litellm.py
+++ b/eval_protocol/proxy/proxy_core/litellm.py
@@ -83,8 +83,12 @@ async def handle_chat_completion(
             ]
         )
 
-    # Build Langfuse metadata (tags)
+    # Build Langfuse metadata (tags + user if present)
+    # Convert user_id (from preprocess hook) to trace_user_id for Langfuse
+    user_id = metadata.pop("user_id", None) or data.get("user")
     litellm_metadata = {"tags": tags, **metadata}
+    if user_id:
+        litellm_metadata["trace_user_id"] = user_id
 
     langfuse_keys = config.langfuse_keys[project_id]
 

From f4dec1741c5a9271a1424fdee4d8b6b601bdc522 Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Mon, 26 Jan 2026 22:51:01 -0800
Subject: [PATCH 11/12] undo langfuse

---
 pyproject.toml | 2 +-
 uv.lock        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index bc417c40..511df95e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -151,7 +151,7 @@ langgraph_tools = [
 
 proxy = [
     "redis>=5.0.0",
-    "langfuse>=2.0.0,<3.0.0",
+    "langfuse>=2.0.0",
     "uuid6>=2025.0.0",
     "litellm>=1.81.0,<1.82.0",
     "opentelemetry-api>=1.29.0",
diff --git a/uv.lock b/uv.lock
index 977a9c07..3ebbcadc 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1330,7 +1330,7 @@ requires-dist = [
     { name = "langchain-core", marker = "extra == 'langchain'", specifier = ">=0.3.0" },
     { name = "langchain-core", marker = "extra == 'langgraph'", specifier = ">=0.3.75" },
     { name = "langfuse", marker = "extra == 'langfuse'", specifier = ">=2.0.0" },
-    { name = "langfuse", marker = "extra == 'proxy'", specifier = ">=2.0.0,<3.0.0" },
+    { name = "langfuse", marker = "extra == 'proxy'", specifier = ">=2.0.0" },
     { name = "langgraph", marker = "extra == 'langgraph'", specifier = ">=0.6.7" },
     { name = "langgraph", marker = "extra == 'langgraph-tools'", specifier = ">=0.6.7" },
     { name = "langsmith", marker = "extra == 'langsmith'", specifier = ">=0.1.86" },

From 4a3cc04cb97ffe5d4753515096fce551e5155f88 Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Mon, 26 Jan 2026 23:22:07 -0800
Subject: [PATCH 12/12] missing

---
 eval_protocol/proxy/proxy_core/models.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/eval_protocol/proxy/proxy_core/models.py b/eval_protocol/proxy/proxy_core/models.py
index bf60cf71..062a870c 100644
--- a/eval_protocol/proxy/proxy_core/models.py
+++ b/eval_protocol/proxy/proxy_core/models.py
@@ -72,6 +72,7 @@ class ObservationResponse(BaseModel):
     input: Optional[Any] = None
     output: Optional[Any] = None
     parent_observation_id: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None
 
 
 class TraceResponse(BaseModel):