eval-protocol · xzrderek · Jan 27, 2026 · Jan 27, 2026 · Jan 27, 2026 · Jan 27, 2026
diff --git a/eval_protocol/adapters/fireworks_tracing.py b/eval_protocol/adapters/fireworks_tracing.py
@@ -46,7 +46,7 @@ def __call__(
         ...
 
 
-def extract_openai_response(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+def extract_otel_attributes(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
     """Attempt to extract and parse attributes from raw_gen_ai_request observation. This only works when stored in OTEL format.
 
     Args:
@@ -137,9 +137,14 @@ def convert_trace_dict_to_evaluation_row(
 
         observations = trace.get("observations") or []
         # We can only extract when stored in OTEL format.
-        openai_response = extract_openai_response(observations)
-        if openai_response:
-            choices = openai_response.get("llm.openai.choices")
+        otel_attributes = extract_otel_attributes(observations)
+        if otel_attributes:
+            # Find choices from any provider (llm.*.choices pattern)
+            choices = None
+            for key, value in otel_attributes.items():
+                if key.endswith(".choices") and isinstance(value, list):
+                    choices = value
+                    break
             if choices and len(choices) > 0:
                 execution_metadata.finish_reason = choices[0].get("finish_reason")
 

diff --git a/eval_protocol/proxy/Dockerfile.gateway b/eval_protocol/proxy/Dockerfile.gateway
@@ -1,23 +1,25 @@
-# Metadata Extraction Gateway - Sits in front of LiteLLM
+# Metadata Extraction Gateway - Uses LiteLLM SDK directly with Langfuse OTEL
 FROM python:3.11-slim
 
 WORKDIR /app
 
 # Prevent Python from buffering stdout/stderr
 ENV PYTHONUNBUFFERED=1
 
-# Copy requirements file
-COPY ./requirements.txt /app/requirements.txt
+# Copy the entire package for local install (context is repo root)
+COPY pyproject.toml /app/pyproject.toml
+COPY eval_protocol /app/eval_protocol
+COPY README.md /app/README.md
 
-# Install dependencies
-RUN pip install --no-cache-dir -r requirements.txt
+# Install from local source with proxy extras
+RUN pip install --no-cache-dir ".[proxy]"
 
-# Copy the proxy package
-COPY ./proxy_core /app/proxy_core
+# Copy the proxy package (local overrides for main.py, auth.py, etc.)
+COPY eval_protocol/proxy/proxy_core /app/proxy_core
 
 # Expose port
 EXPOSE 4000
 
 # Run the gateway as a module
-# LITELLM_URL will be set by environment (docker-compose or Cloud Run)
+# LANGFUSE_HOST and REDIS_HOST will be set by environment (docker-compose or Cloud Run)
 CMD ["python", "-m", "proxy_core.main"]
diff --git a/eval_protocol/proxy/README.md b/eval_protocol/proxy/README.md
@@ -59,9 +59,9 @@ This enables distributed evaluation systems to track which LLM completions belon
    - Stores insertion IDs per rollout for completeness checking
    - Uses Redis Sets: `rollout_id -> {insertion_id_1, insertion_id_2, ...}`
 
-#### 3. **LiteLLM Backend**
-   - Standard LiteLLM proxy for routing to LLM providers
-   - Configured with Langfuse callbacks for automatic tracing
+#### 3. **LiteLLM SDK (Direct)**
+   - Uses LiteLLM SDK directly for LLM calls (no separate proxy server needed)
+   - Integrated with Langfuse via `langfuse_otel` OpenTelemetry callback
 
 ## Key Features
 
@@ -244,12 +244,11 @@ Forwards any other request to LiteLLM backend with API key injection.
 
 | Variable | Required | Default | Description |
 |----------|----------|---------|-------------|
-| `LITELLM_URL` | Yes | - | URL of LiteLLM backend |
 | `REDIS_HOST` | Yes | - | Redis hostname |
 | `REDIS_PORT` | No | 6379 | Redis port |
 | `REDIS_PASSWORD` | No | - | Redis password |
 | `SECRETS_PATH` | No | `proxy_core/secrets.yaml` | Path to secrets file (YAML) |
-| `LANGFUSE_HOST` | No | `https://cloud.langfuse.com` | Langfuse base URL |
+| `LANGFUSE_HOST` | No | `https://us.cloud.langfuse.com` | Langfuse OTEL host for tracing |
 | `REQUEST_TIMEOUT` | No | 300.0 | Request timeout (LLM calls) in seconds |
 | `LOG_LEVEL` | No | INFO | Logging level |
 | `PORT` | No | 4000 | Gateway port |
@@ -272,25 +271,26 @@ default_project_id: project-1
 
 ### LiteLLM Configuration
 
-The `config_no_cache.yaml` configures LiteLLM:
+The `config_no_cache.yaml` configures LiteLLM (only needed if running a standalone LiteLLM proxy):
 ```yaml
 model_list:
   - model_name: "*"
     litellm_params:
       model: "*"
 litellm_settings:
-  success_callback: ["langfuse"]
-  failure_callback: ["langfuse"]
+  callbacks: ["langfuse_otel"]
   drop_params: True
 general_settings:
   allow_client_side_credentials: true
 ```
 
 Key settings:
 - **Wildcard model support**: Route any model to any provider
-- **Langfuse callbacks**: Automatic tracing on success/failure
+- **Langfuse OTEL**: OpenTelemetry-based tracing via `langfuse_otel` callback
 - **Client-side credentials**: Accept API keys from request body
 
+**Note:** The proxy now uses the LiteLLM SDK directly with `langfuse_otel` integration, so a separate LiteLLM proxy server is no longer required.
+
 ## Security Considerations
 
 ### Authentication

diff --git a/eval_protocol/proxy/config_no_cache.yaml b/eval_protocol/proxy/config_no_cache.yaml
@@ -3,8 +3,7 @@ model_list:
     litellm_params:
       model: "*"
 litellm_settings:
-  success_callback: ["langfuse"]
-  failure_callback: ["langfuse"]
+  callbacks: ["langfuse_otel"]
   drop_params: True
 general_settings:
   allow_client_side_credentials: true
diff --git a/eval_protocol/proxy/docker-compose.yml b/eval_protocol/proxy/docker-compose.yml
@@ -7,41 +7,19 @@ services:
     ports:
       - "6379:6379"  # Expose for debugging if needed
     networks:
-      - litellm-network
+      - proxy-network
     restart: unless-stopped
     command: redis-server --appendonly yes
     volumes:
       - redis-data:/data
 
-  # LiteLLM Backend - Handles actual LLM proxying
-  litellm-backend:
-    image: litellm/litellm:v1.77.3-stable
-    platform: linux/amd64
-    container_name: litellm-backend
-    command: ["--config", "/app/config.yaml", "--port", "4000", "--host", "0.0.0.0"]
-    # If you want to be able to use other model providers like OpenAI, Anthropic, etc., you need to set keys in .env file.
-    env_file:
-      - .env  # Load API keys from .env file
-    environment:
-      - LANGFUSE_PUBLIC_KEY=dummy  # Set dummy public and private key so Langfuse instance initializes in LiteLLM, then real keys get sent in proxy
-      - LANGFUSE_SECRET_KEY=dummy
-    volumes:
-      - ./config_no_cache.yaml:/app/config.yaml:ro
-    ports:
-      - "4001:4000"  # Expose on 4001 for direct access if needed
-    networks:
-      - litellm-network
-    restart: unless-stopped
-
-  # Metadata Gateway - Public-facing service that extracts metadata from URLs
+  # Metadata Gateway - Handles LLM calls directly via LiteLLM SDK with Langfuse OTEL
   metadata-gateway:
     build:
-      context: .
-      dockerfile: Dockerfile.gateway
+      context: ../..
+      dockerfile: eval_protocol/proxy/Dockerfile.gateway
     container_name: metadata-gateway
     environment:
-      # Point to the LiteLLM backend service
-      - LITELLM_URL=http://litellm-backend:4000
       - PORT=4000
       # Redis configuration for assistant message counting
       - REDIS_HOST=redis
@@ -56,14 +34,13 @@ services:
     ports:
       - "4000:4000"  # Main public-facing port
     networks:
-      - litellm-network
+      - proxy-network
     depends_on:
-      - litellm-backend
       - redis
     restart: unless-stopped
 
 networks:
-  litellm-network:
+  proxy-network:
     driver: bridge
 
 volumes:

diff --git a/eval_protocol/proxy/proxy_core/app.py b/eval_protocol/proxy/proxy_core/app.py
@@ -15,7 +15,7 @@
 
 from .models import ProxyConfig, LangfuseTracesResponse, TracesParams, ChatParams, ChatRequestHook, TracesRequestHook
 from .auth import AuthProvider, NoAuthProvider
-from .litellm import handle_chat_completion, proxy_to_litellm
+from .litellm import handle_chat_completion
 from .langfuse import fetch_langfuse_traces, pointwise_fetch_langfuse_trace
 
 # Configure logging before any other imports (so all modules inherit this config)
@@ -35,10 +35,6 @@ def build_proxy_config(
     preprocess_traces_request: Optional[TracesRequestHook] = None,
 ) -> ProxyConfig:
     """Load environment and secrets, and build ProxyConfig"""
-    # Env
-    litellm_url = os.getenv("LITELLM_URL")
-    if not litellm_url:
-        raise ValueError("LITELLM_URL environment variable must be set")
     request_timeout = float(os.getenv("REQUEST_TIMEOUT", "300.0"))
     langfuse_host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
 
@@ -66,7 +62,6 @@ def build_proxy_config(
         raise ValueError(f"Invalid format in secrets file {secrets_path.name}: {e}")
 
     return ProxyConfig(
-        litellm_url=litellm_url,
         request_timeout=request_timeout,
         langfuse_host=langfuse_host,
         langfuse_keys=langfuse_keys,
@@ -113,6 +108,16 @@ async def lifespan(app: FastAPI):
         app.state.config = build_proxy_config(preprocess_chat_request, preprocess_traces_request)
         app.state.redis = init_redis()
 
+        config = app.state.config
+        default_keys = config.langfuse_keys[config.default_project_id]
+        os.environ["LANGFUSE_PUBLIC_KEY"] = default_keys["public_key"]
+        os.environ["LANGFUSE_SECRET_KEY"] = default_keys["secret_key"]
+        os.environ.setdefault("LANGFUSE_HOST", config.langfuse_host)
+
+        import litellm
+
+        litellm.callbacks = ["langfuse_otel"]
+
         try:
             yield
         finally:
@@ -297,13 +302,4 @@ async def pointwise_get_langfuse_trace(
     async def health():
         return {"status": "healthy", "service": "metadata-proxy"}
 
-    # Catch-all
-    @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"])
-    async def catch_all_proxy(
-        path: str,
-        request: Request,
-        config: ProxyConfig = Depends(get_config),
-    ):
-        return await proxy_to_litellm(config, path, request)
-
     return app