Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions eval_protocol/adapters/fireworks_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __call__(
...


def extract_openai_response(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
def extract_otel_attributes(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
"""Attempt to extract and parse attributes from raw_gen_ai_request observation. This only works when stored in OTEL format.

Args:
Expand Down Expand Up @@ -137,9 +137,14 @@ def convert_trace_dict_to_evaluation_row(

observations = trace.get("observations") or []
# We can only extract when stored in OTEL format.
openai_response = extract_openai_response(observations)
if openai_response:
choices = openai_response.get("llm.openai.choices")
otel_attributes = extract_otel_attributes(observations)
if otel_attributes:
# Find choices from any provider (llm.*.choices pattern)
choices = None
for key, value in otel_attributes.items():
if key.endswith(".choices") and isinstance(value, list):
choices = value
break
if choices and len(choices) > 0:
execution_metadata.finish_reason = choices[0].get("finish_reason")

Expand Down
18 changes: 10 additions & 8 deletions eval_protocol/proxy/Dockerfile.gateway
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
# Metadata Extraction Gateway - Sits in front of LiteLLM
# Metadata Extraction Gateway - Uses LiteLLM SDK directly with Langfuse OTEL
FROM python:3.11-slim

WORKDIR /app

# Prevent Python from buffering stdout/stderr
ENV PYTHONUNBUFFERED=1

# Copy requirements file
COPY ./requirements.txt /app/requirements.txt
# Copy the entire package for local install (context is repo root)
COPY pyproject.toml /app/pyproject.toml
COPY eval_protocol /app/eval_protocol
COPY README.md /app/README.md

# Install dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Install from local source with proxy extras
RUN pip install --no-cache-dir ".[proxy]"

# Copy the proxy package
COPY ./proxy_core /app/proxy_core
# Copy the proxy package (local overrides for main.py, auth.py, etc.)
COPY eval_protocol/proxy/proxy_core /app/proxy_core

# Expose port
EXPOSE 4000

# Run the gateway as a module
# LITELLM_URL will be set by environment (docker-compose or Cloud Run)
# LANGFUSE_HOST and REDIS_HOST will be set by environment (docker-compose or Cloud Run)
CMD ["python", "-m", "proxy_core.main"]
18 changes: 9 additions & 9 deletions eval_protocol/proxy/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ This enables distributed evaluation systems to track which LLM completions belon
- Stores insertion IDs per rollout for completeness checking
- Uses Redis Sets: `rollout_id -> {insertion_id_1, insertion_id_2, ...}`

#### 3. **LiteLLM Backend**
- Standard LiteLLM proxy for routing to LLM providers
- Configured with Langfuse callbacks for automatic tracing
#### 3. **LiteLLM SDK (Direct)**
- Uses LiteLLM SDK directly for LLM calls (no separate proxy server needed)
- Integrated with Langfuse via `langfuse_otel` OpenTelemetry callback

## Key Features

Expand Down Expand Up @@ -244,12 +244,11 @@ Forwards any other request to LiteLLM backend with API key injection.

| Variable | Required | Default | Description |
|----------|----------|---------|-------------|
| `LITELLM_URL` | Yes | - | URL of LiteLLM backend |
| `REDIS_HOST` | Yes | - | Redis hostname |
| `REDIS_PORT` | No | 6379 | Redis port |
| `REDIS_PASSWORD` | No | - | Redis password |
| `SECRETS_PATH` | No | `proxy_core/secrets.yaml` | Path to secrets file (YAML) |
| `LANGFUSE_HOST` | No | `https://cloud.langfuse.com` | Langfuse base URL |
| `LANGFUSE_HOST` | No | `https://us.cloud.langfuse.com` | Langfuse OTEL host for tracing |
| `REQUEST_TIMEOUT` | No | 300.0 | Request timeout (LLM calls) in seconds |
| `LOG_LEVEL` | No | INFO | Logging level |
| `PORT` | No | 4000 | Gateway port |
Expand All @@ -272,25 +271,26 @@ default_project_id: project-1

### LiteLLM Configuration

The `config_no_cache.yaml` configures LiteLLM:
The `config_no_cache.yaml` configures LiteLLM (only needed if running a standalone LiteLLM proxy):
```yaml
model_list:
- model_name: "*"
litellm_params:
model: "*"
litellm_settings:
success_callback: ["langfuse"]
failure_callback: ["langfuse"]
callbacks: ["langfuse_otel"]
drop_params: True
general_settings:
allow_client_side_credentials: true
```

Key settings:
- **Wildcard model support**: Route any model to any provider
- **Langfuse callbacks**: Automatic tracing on success/failure
- **Langfuse OTEL**: OpenTelemetry-based tracing via `langfuse_otel` callback
- **Client-side credentials**: Accept API keys from request body

**Note:** The proxy now uses the LiteLLM SDK directly with `langfuse_otel` integration, so a separate LiteLLM proxy server is no longer required.

## Security Considerations

### Authentication
Expand Down
3 changes: 1 addition & 2 deletions eval_protocol/proxy/config_no_cache.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ model_list:
litellm_params:
model: "*"
litellm_settings:
success_callback: ["langfuse"]
failure_callback: ["langfuse"]
callbacks: ["langfuse_otel"]
drop_params: True
general_settings:
allow_client_side_credentials: true
35 changes: 6 additions & 29 deletions eval_protocol/proxy/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,19 @@ services:
ports:
- "6379:6379" # Expose for debugging if needed
networks:
- litellm-network
- proxy-network
restart: unless-stopped
command: redis-server --appendonly yes
volumes:
- redis-data:/data

# LiteLLM Backend - Handles actual LLM proxying
litellm-backend:
image: litellm/litellm:v1.77.3-stable
platform: linux/amd64
container_name: litellm-backend
command: ["--config", "/app/config.yaml", "--port", "4000", "--host", "0.0.0.0"]
# If you want to be able to use other model providers like OpenAI, Anthropic, etc., you need to set keys in .env file.
env_file:
- .env # Load API keys from .env file
environment:
- LANGFUSE_PUBLIC_KEY=dummy # Set dummy public and private key so Langfuse instance initializes in LiteLLM, then real keys get sent in proxy
- LANGFUSE_SECRET_KEY=dummy
volumes:
- ./config_no_cache.yaml:/app/config.yaml:ro
ports:
- "4001:4000" # Expose on 4001 for direct access if needed
networks:
- litellm-network
restart: unless-stopped

# Metadata Gateway - Public-facing service that extracts metadata from URLs
# Metadata Gateway - Handles LLM calls directly via LiteLLM SDK with Langfuse OTEL
metadata-gateway:
build:
context: .
dockerfile: Dockerfile.gateway
context: ../..
dockerfile: eval_protocol/proxy/Dockerfile.gateway
container_name: metadata-gateway
environment:
# Point to the LiteLLM backend service
- LITELLM_URL=http://litellm-backend:4000
- PORT=4000
# Redis configuration for assistant message counting
- REDIS_HOST=redis
Expand All @@ -56,14 +34,13 @@ services:
ports:
- "4000:4000" # Main public-facing port
networks:
- litellm-network
- proxy-network
depends_on:
- litellm-backend
- redis
restart: unless-stopped

networks:
litellm-network:
proxy-network:
driver: bridge

volumes:
Expand Down
26 changes: 11 additions & 15 deletions eval_protocol/proxy/proxy_core/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from .models import ProxyConfig, LangfuseTracesResponse, TracesParams, ChatParams, ChatRequestHook, TracesRequestHook
from .auth import AuthProvider, NoAuthProvider
from .litellm import handle_chat_completion, proxy_to_litellm
from .litellm import handle_chat_completion
from .langfuse import fetch_langfuse_traces, pointwise_fetch_langfuse_trace

# Configure logging before any other imports (so all modules inherit this config)
Expand All @@ -35,10 +35,6 @@ def build_proxy_config(
preprocess_traces_request: Optional[TracesRequestHook] = None,
) -> ProxyConfig:
"""Load environment and secrets, and build ProxyConfig"""
# Env
litellm_url = os.getenv("LITELLM_URL")
if not litellm_url:
raise ValueError("LITELLM_URL environment variable must be set")
request_timeout = float(os.getenv("REQUEST_TIMEOUT", "300.0"))
langfuse_host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")

Expand Down Expand Up @@ -66,7 +62,6 @@ def build_proxy_config(
raise ValueError(f"Invalid format in secrets file {secrets_path.name}: {e}")

return ProxyConfig(
litellm_url=litellm_url,
request_timeout=request_timeout,
langfuse_host=langfuse_host,
langfuse_keys=langfuse_keys,
Expand Down Expand Up @@ -113,6 +108,16 @@ async def lifespan(app: FastAPI):
app.state.config = build_proxy_config(preprocess_chat_request, preprocess_traces_request)
app.state.redis = init_redis()

config = app.state.config
default_keys = config.langfuse_keys[config.default_project_id]
os.environ["LANGFUSE_PUBLIC_KEY"] = default_keys["public_key"]
os.environ["LANGFUSE_SECRET_KEY"] = default_keys["secret_key"]
os.environ.setdefault("LANGFUSE_HOST", config.langfuse_host)

import litellm

litellm.callbacks = ["langfuse_otel"]

try:
yield
finally:
Expand Down Expand Up @@ -297,13 +302,4 @@ async def pointwise_get_langfuse_trace(
async def health():
return {"status": "healthy", "service": "metadata-proxy"}

# Catch-all
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"])
async def catch_all_proxy(
path: str,
request: Request,
config: ProxyConfig = Depends(get_config),
):
return await proxy_to_litellm(config, path, request)

return app
Loading
Loading