Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion eval_protocol/adapters/langfuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,13 @@ def __call__(


try:
from langfuse import get_client # pyright: ignore[reportPrivateImportUsage]
from langfuse import Langfuse

LANGFUSE_AVAILABLE = True

def get_client():
"""Compatibility shim for langfuse 2.x (returns Langfuse instance)."""
return Langfuse()
except ImportError:
LANGFUSE_AVAILABLE = False

Expand Down
4 changes: 2 additions & 2 deletions tests/chinook/langfuse/generate_traces.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
from tests.chinook.dataset import collect_dataset

try:
from langfuse import get_client, observe # pyright: ignore[reportPrivateImportUsage]
from langfuse import Langfuse, observe
from pydantic_ai.agent import Agent
from pydantic_ai.models.openai import OpenAIChatModel

LANGFUSE_AVAILABLE = True
langfuse_client = get_client()
langfuse_client = Langfuse()

Agent.instrument_all()

Expand Down
4 changes: 2 additions & 2 deletions tests/chinook/langfuse/test_langfuse_chinook.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@

# Langfuse client setup
try:
from langfuse import get_client # pyright: ignore[reportPrivateImportUsage]
from langfuse import Langfuse

LANGFUSE_AVAILABLE = True
langfuse = get_client()
langfuse = Langfuse()
except ImportError:
LANGFUSE_AVAILABLE = False
langfuse = None
Expand Down
3 changes: 3 additions & 0 deletions tests/test_adapters_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ def test_langfuse_conversation_analysis(self):
class TestHuggingFaceAdapterE2E:
"""End-to-end tests for HuggingFace adapter with real datasets."""

@pytest.mark.skip(reason="gsm8k dataset no longer available on HuggingFace Hub")
def test_gsm8k_adapter_real_data(self):
"""Test loading real GSM8K data and converting to EvaluationRow."""
try:
Expand Down Expand Up @@ -318,6 +319,7 @@ def math_transform(row: Dict[str, Any]) -> Dict[str, Any]:

print(f" Row {i}: Type={dataset_info.get('type')}, Level={dataset_info.get('level')}")

@pytest.mark.skip(reason="squad dataset no longer available on HuggingFace Hub")
def test_custom_dataset_transform(self):
"""Test adapter with a completely custom transformation."""
try:
Expand Down Expand Up @@ -663,6 +665,7 @@ def google_books_transform(row: Dict[str, Any]) -> Dict[str, Any]:
assert doc_freq > 5, f"Row {i} should have document frequency > 5"


@pytest.mark.skip(reason="gsm8k dataset no longer available on HuggingFace Hub")
def test_adapters_integration():
"""Test that adapters work with evaluation pipeline."""
print("Testing adapter integration with evaluation pipeline...")
Expand Down
Loading