diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 71f7544a1c..a4ebe96d99 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -72,6 +72,397 @@ def parse_data_uri(url: str) -> "Tuple[str, str]": return mime_type, content +def get_modality_from_mime_type(mime_type: str) -> str: + """ + Infer the content modality from a MIME type string. + + Args: + mime_type: A MIME type string (e.g., "image/jpeg", "audio/mp3") + + Returns: + One of: "image", "audio", "video", or "document" + Defaults to "image" for unknown or empty MIME types. + + Examples: + "image/jpeg" -> "image" + "audio/mp3" -> "audio" + "video/mp4" -> "video" + "application/pdf" -> "document" + "text/plain" -> "document" + """ + if not mime_type: + return "image" # Default fallback + + mime_lower = mime_type.lower() + if mime_lower.startswith("image/"): + return "image" + elif mime_lower.startswith("audio/"): + return "audio" + elif mime_lower.startswith("video/"): + return "video" + elif mime_lower.startswith("application/") or mime_lower.startswith("text/"): + return "document" + else: + return "image" # Default fallback for unknown types + + +def transform_openai_content_part( + content_part: "Dict[str, Any]", +) -> "Optional[Dict[str, Any]]": + """ + Transform an OpenAI/LiteLLM content part to Sentry's standardized format. + + This handles the OpenAI image_url format used by OpenAI and LiteLLM SDKs. + + Input format: + - {"type": "image_url", "image_url": {"url": "..."}} + - {"type": "image_url", "image_url": "..."} (string shorthand) + + Output format (one of): + - {"type": "blob", "modality": "image", "mime_type": "...", "content": "..."} + - {"type": "uri", "modality": "image", "mime_type": "", "uri": "..."} + + Args: + content_part: A dictionary representing a content part from OpenAI/LiteLLM + + Returns: + A transformed dictionary in standardized format, or None if the format + is not OpenAI image_url format or transformation fails. + """ + if not isinstance(content_part, dict): + return None + + block_type = content_part.get("type") + + if block_type != "image_url": + return None + + image_url_data = content_part.get("image_url") + if isinstance(image_url_data, str): + url = image_url_data + elif isinstance(image_url_data, dict): + url = image_url_data.get("url", "") + else: + return None + + if not url: + return None + + # Check if it's a data URI (base64 encoded) + if url.startswith("data:"): + try: + mime_type, content = parse_data_uri(url) + return { + "type": "blob", + "modality": get_modality_from_mime_type(mime_type), + "mime_type": mime_type, + "content": content, + } + except ValueError: + # If parsing fails, return as URI + return { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": url, + } + else: + # Regular URL + return { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": url, + } + + +def transform_anthropic_content_part( + content_part: "Dict[str, Any]", +) -> "Optional[Dict[str, Any]]": + """ + Transform an Anthropic content part to Sentry's standardized format. + + This handles the Anthropic image and document formats with source dictionaries. + + Input format: + - {"type": "image", "source": {"type": "base64", "media_type": "...", "data": "..."}} + - {"type": "image", "source": {"type": "url", "media_type": "...", "url": "..."}} + - {"type": "image", "source": {"type": "file", "media_type": "...", "file_id": "..."}} + - {"type": "document", "source": {...}} (same source formats) + + Output format (one of): + - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."} + - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."} + - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."} + + Args: + content_part: A dictionary representing a content part from Anthropic + + Returns: + A transformed dictionary in standardized format, or None if the format + is not Anthropic format or transformation fails. + """ + if not isinstance(content_part, dict): + return None + + block_type = content_part.get("type") + + if block_type not in ("image", "document") or "source" not in content_part: + return None + + source = content_part.get("source") + if not isinstance(source, dict): + return None + + source_type = source.get("type") + media_type = source.get("media_type", "") + modality = ( + "document" + if block_type == "document" + else get_modality_from_mime_type(media_type) + ) + + if source_type == "base64": + return { + "type": "blob", + "modality": modality, + "mime_type": media_type, + "content": source.get("data", ""), + } + elif source_type == "url": + return { + "type": "uri", + "modality": modality, + "mime_type": media_type, + "uri": source.get("url", ""), + } + elif source_type == "file": + return { + "type": "file", + "modality": modality, + "mime_type": media_type, + "file_id": source.get("file_id", ""), + } + + return None + + +def transform_google_content_part( + content_part: "Dict[str, Any]", +) -> "Optional[Dict[str, Any]]": + """ + Transform a Google GenAI content part to Sentry's standardized format. + + This handles the Google GenAI inline_data and file_data formats. + + Input format: + - {"inline_data": {"mime_type": "...", "data": "..."}} + - {"file_data": {"mime_type": "...", "file_uri": "..."}} + + Output format (one of): + - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."} + - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."} + + Args: + content_part: A dictionary representing a content part from Google GenAI + + Returns: + A transformed dictionary in standardized format, or None if the format + is not Google format or transformation fails. + """ + if not isinstance(content_part, dict): + return None + + # Handle Google inline_data format + if "inline_data" in content_part: + inline_data = content_part.get("inline_data") + if isinstance(inline_data, dict): + mime_type = inline_data.get("mime_type", "") + return { + "type": "blob", + "modality": get_modality_from_mime_type(mime_type), + "mime_type": mime_type, + "content": inline_data.get("data", ""), + } + return None + + # Handle Google file_data format + if "file_data" in content_part: + file_data = content_part.get("file_data") + if isinstance(file_data, dict): + mime_type = file_data.get("mime_type", "") + return { + "type": "uri", + "modality": get_modality_from_mime_type(mime_type), + "mime_type": mime_type, + "uri": file_data.get("file_uri", ""), + } + return None + + return None + + +def transform_generic_content_part( + content_part: "Dict[str, Any]", +) -> "Optional[Dict[str, Any]]": + """ + Transform a generic/LangChain-style content part to Sentry's standardized format. + + This handles generic formats where the type indicates the modality and + the data is provided via direct base64, url, or file_id fields. + + Input format: + - {"type": "image", "base64": "...", "mime_type": "..."} + - {"type": "audio", "url": "...", "mime_type": "..."} + - {"type": "video", "base64": "...", "mime_type": "..."} + - {"type": "file", "file_id": "...", "mime_type": "..."} + + Output format (one of): + - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."} + - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."} + - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."} + + Args: + content_part: A dictionary representing a content part in generic format + + Returns: + A transformed dictionary in standardized format, or None if the format + is not generic format or transformation fails. + """ + if not isinstance(content_part, dict): + return None + + block_type = content_part.get("type") + + if block_type not in ("image", "audio", "video", "file"): + return None + + # Ensure it's not Anthropic format (which also uses type: "image") + if "source" in content_part: + return None + + mime_type = content_part.get("mime_type", "") + modality = block_type if block_type != "file" else "document" + + # Check for base64 encoded content + if "base64" in content_part: + return { + "type": "blob", + "modality": modality, + "mime_type": mime_type, + "content": content_part.get("base64", ""), + } + # Check for URL reference + elif "url" in content_part: + return { + "type": "uri", + "modality": modality, + "mime_type": mime_type, + "uri": content_part.get("url", ""), + } + # Check for file_id reference + elif "file_id" in content_part: + return { + "type": "file", + "modality": modality, + "mime_type": mime_type, + "file_id": content_part.get("file_id", ""), + } + + return None + + +def transform_content_part( + content_part: "Dict[str, Any]", +) -> "Optional[Dict[str, Any]]": + """ + Transform a content part from various AI SDK formats to Sentry's standardized format. + + This is a heuristic dispatcher that detects the format and delegates to the + appropriate SDK-specific transformer. For direct SDK integration, prefer using + the specific transformers directly: + - transform_openai_content_part() for OpenAI/LiteLLM + - transform_anthropic_content_part() for Anthropic + - transform_google_content_part() for Google GenAI + - transform_generic_content_part() for LangChain and other generic formats + + Detection order: + 1. OpenAI: type == "image_url" + 2. Google: "inline_data" or "file_data" keys present + 3. Anthropic: type in ("image", "document") with "source" key + 4. Generic: type in ("image", "audio", "video", "file") with base64/url/file_id + + Output format (one of): + - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."} + - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."} + - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."} + + Args: + content_part: A dictionary representing a content part from an AI SDK + + Returns: + A transformed dictionary in standardized format, or None if the format + is unrecognized or transformation fails. + """ + if not isinstance(content_part, dict): + return None + + # Try OpenAI format first (most common, clear indicator) + result = transform_openai_content_part(content_part) + if result is not None: + return result + + # Try Google format (unique keys make it easy to detect) + result = transform_google_content_part(content_part) + if result is not None: + return result + + # Try Anthropic format (has "source" key) + result = transform_anthropic_content_part(content_part) + if result is not None: + return result + + # Try generic format as fallback + result = transform_generic_content_part(content_part) + if result is not None: + return result + + # Unrecognized format + return None + + +def transform_message_content(content: "Any") -> "Any": + """ + Transform message content, handling both string content and list of content blocks. + + For list content, each item is transformed using transform_content_part(). + Items that cannot be transformed (return None) are kept as-is. + + Args: + content: Message content - can be a string, list of content blocks, or other + + Returns: + - String content: returned as-is + - List content: list with each transformable item converted to standardized format + - Other: returned as-is + """ + if isinstance(content, str): + return content + + if isinstance(content, (list, tuple)): + transformed = [] + for item in content: + if isinstance(item, dict): + result = transform_content_part(item) + # If transformation succeeded, use the result; otherwise keep original + transformed.append(result if result is not None else item) + else: + transformed.append(item) + return transformed + + return content + + def _normalize_data(data: "Any", unpack: bool = True) -> "Any": # convert pydantic data (e.g. OpenAI v1+) to json compatible format if hasattr(data, "model_dump"): diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index 06ae8a0782..5ec079367e 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -1,3 +1,4 @@ +import copy from typing import TYPE_CHECKING import sentry_sdk @@ -7,6 +8,7 @@ get_start_span_function, set_data_normalized, truncate_and_annotate_messages, + transform_openai_content_part, ) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration @@ -14,7 +16,7 @@ from sentry_sdk.utils import event_from_exception if TYPE_CHECKING: - from typing import Any, Dict + from typing import Any, Dict, List from datetime import datetime try: @@ -36,6 +38,33 @@ def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]": return metadata +def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]": + """ + Convert the message parts from OpenAI format to the `gen_ai.request.messages` format + using the OpenAI-specific transformer (LiteLLM uses OpenAI's message format). + + Deep copies messages to avoid mutating original kwargs. + """ + # Deep copy to avoid mutating original messages from kwargs + messages = copy.deepcopy(messages) + + for message in messages: + if not isinstance(message, dict): + continue + content = message.get("content") + if isinstance(content, (list, tuple)): + transformed = [] + for item in content: + if isinstance(item, dict): + result = transform_openai_content_part(item) + # If transformation succeeded, use the result; otherwise keep original + transformed.append(result if result is not None else item) + else: + transformed.append(item) + message["content"] = transformed + return messages + + def _input_callback(kwargs: "Dict[str, Any]") -> None: """Handle the start of a request.""" integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration) @@ -102,6 +131,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None: messages = kwargs.get("messages", []) if messages: scope = sentry_sdk.get_current_scope() + messages = _convert_message_parts(messages) messages_data = truncate_and_annotate_messages(messages, span, scope) if messages_data is not None: set_data_normalized( diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 1b925fb61f..8849ab0372 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -1,3 +1,4 @@ +import base64 import json import pytest import time @@ -21,8 +22,10 @@ async def __call__(self, *args, **kwargs): import sentry_sdk from sentry_sdk import start_transaction from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk._types import BLOB_DATA_SUBSTITUTE from sentry_sdk.integrations.litellm import ( LiteLLMIntegration, + _convert_message_parts, _input_callback, _success_callback, _failure_callback, @@ -753,3 +756,241 @@ def test_litellm_message_truncation(sentry_init, capture_events): assert "small message 4" in str(parsed_messages[0]) assert "small message 5" in str(parsed_messages[1]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 + + +IMAGE_DATA = b"fake_image_data_12345" +IMAGE_B64 = base64.b64encode(IMAGE_DATA).decode("utf-8") +IMAGE_DATA_URI = f"data:image/png;base64,{IMAGE_B64}" + + +def test_binary_content_encoding_image_url(sentry_init, capture_events): + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Look at this image:"}, + { + "type": "image_url", + "image_url": {"url": IMAGE_DATA_URI, "detail": "high"}, + }, + ], + } + ] + mock_response = MockCompletionResponse() + + with start_transaction(name="litellm test"): + kwargs = {"model": "gpt-4-vision-preview", "messages": messages} + _input_callback(kwargs) + _success_callback(kwargs, mock_response, datetime.now(), datetime.now()) + + (event,) = events + (span,) = event["spans"] + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + blob_item = next( + ( + item + for msg in messages_data + if "content" in msg + for item in msg["content"] + if item.get("type") == "blob" + ), + None, + ) + assert blob_item is not None + assert blob_item["modality"] == "image" + assert blob_item["mime_type"] == "image/png" + assert ( + IMAGE_B64 in blob_item["content"] + or blob_item["content"] == BLOB_DATA_SUBSTITUTE + ) + + +def test_binary_content_encoding_mixed_content(sentry_init, capture_events): + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Here is an image:"}, + { + "type": "image_url", + "image_url": {"url": IMAGE_DATA_URI}, + }, + {"type": "text", "text": "What do you see?"}, + ], + } + ] + mock_response = MockCompletionResponse() + + with start_transaction(name="litellm test"): + kwargs = {"model": "gpt-4-vision-preview", "messages": messages} + _input_callback(kwargs) + _success_callback(kwargs, mock_response, datetime.now(), datetime.now()) + + (event,) = events + (span,) = event["spans"] + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + content_items = [ + item for msg in messages_data if "content" in msg for item in msg["content"] + ] + assert any(item.get("type") == "text" for item in content_items) + assert any(item.get("type") == "blob" for item in content_items) + + +def test_binary_content_encoding_uri_type(sentry_init, capture_events): + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": "https://example.com/image.jpg"}, + } + ], + } + ] + mock_response = MockCompletionResponse() + + with start_transaction(name="litellm test"): + kwargs = {"model": "gpt-4-vision-preview", "messages": messages} + _input_callback(kwargs) + _success_callback(kwargs, mock_response, datetime.now(), datetime.now()) + + (event,) = events + (span,) = event["spans"] + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + uri_item = next( + ( + item + for msg in messages_data + if "content" in msg + for item in msg["content"] + if item.get("type") == "uri" + ), + None, + ) + assert uri_item is not None + assert uri_item["uri"] == "https://example.com/image.jpg" + + +def test_convert_message_parts_direct(): + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Hello"}, + { + "type": "image_url", + "image_url": {"url": IMAGE_DATA_URI}, + }, + ], + } + ] + converted = _convert_message_parts(messages) + blob_item = next( + item for item in converted[0]["content"] if item.get("type") == "blob" + ) + assert blob_item["modality"] == "image" + assert blob_item["mime_type"] == "image/png" + assert IMAGE_B64 in blob_item["content"] + + +def test_convert_message_parts_does_not_mutate_original(): + """Ensure _convert_message_parts does not mutate the original messages.""" + original_url = IMAGE_DATA_URI + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": original_url}, + }, + ], + } + ] + _convert_message_parts(messages) + # Original should be unchanged + assert messages[0]["content"][0]["type"] == "image_url" + assert messages[0]["content"][0]["image_url"]["url"] == original_url + + +def test_convert_message_parts_data_url_without_base64(): + """Data URLs without ;base64, marker are still inline data and should be blobs.""" + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": "data:image/png,rawdata"}, + }, + ], + } + ] + converted = _convert_message_parts(messages) + blob_item = converted[0]["content"][0] + # Data URIs (with or without base64 encoding) contain inline data and should be blobs + assert blob_item["type"] == "blob" + assert blob_item["modality"] == "image" + assert blob_item["mime_type"] == "image/png" + assert blob_item["content"] == "rawdata" + + +def test_convert_message_parts_image_url_none(): + """image_url being None should not crash.""" + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": None, + }, + ], + } + ] + converted = _convert_message_parts(messages) + # Should return item unchanged + assert converted[0]["content"][0]["type"] == "image_url" + + +def test_convert_message_parts_image_url_missing_url(): + """image_url missing the url key should not crash.""" + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"detail": "high"}, + }, + ], + } + ] + converted = _convert_message_parts(messages) + # Should return item unchanged + assert converted[0]["content"][0]["type"] == "image_url" diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py index 1ff354f473..f6852d54bb 100644 --- a/tests/test_ai_monitoring.py +++ b/tests/test_ai_monitoring.py @@ -19,6 +19,13 @@ _find_truncation_index, parse_data_uri, redact_blob_message_parts, + get_modality_from_mime_type, + transform_openai_content_part, + transform_anthropic_content_part, + transform_google_content_part, + transform_generic_content_part, + transform_content_part, + transform_message_content, ) from sentry_sdk.serializer import serialize from sentry_sdk.utils import safe_serialize @@ -842,3 +849,906 @@ def test_handles_uri_without_data_prefix(self): assert mime_type == "image/jpeg" assert content == "/9j/4AAQ" + + +class TestGetModalityFromMimeType: + def test_image_mime_types(self): + """Test that image MIME types return 'image' modality""" + assert get_modality_from_mime_type("image/jpeg") == "image" + assert get_modality_from_mime_type("image/png") == "image" + assert get_modality_from_mime_type("image/gif") == "image" + assert get_modality_from_mime_type("image/webp") == "image" + assert get_modality_from_mime_type("IMAGE/JPEG") == "image" # case insensitive + + def test_audio_mime_types(self): + """Test that audio MIME types return 'audio' modality""" + assert get_modality_from_mime_type("audio/mp3") == "audio" + assert get_modality_from_mime_type("audio/wav") == "audio" + assert get_modality_from_mime_type("audio/ogg") == "audio" + assert get_modality_from_mime_type("AUDIO/MP3") == "audio" # case insensitive + + def test_video_mime_types(self): + """Test that video MIME types return 'video' modality""" + assert get_modality_from_mime_type("video/mp4") == "video" + assert get_modality_from_mime_type("video/webm") == "video" + assert get_modality_from_mime_type("video/quicktime") == "video" + assert get_modality_from_mime_type("VIDEO/MP4") == "video" # case insensitive + + def test_document_mime_types(self): + """Test that application and text MIME types return 'document' modality""" + assert get_modality_from_mime_type("application/pdf") == "document" + assert get_modality_from_mime_type("application/json") == "document" + assert get_modality_from_mime_type("text/plain") == "document" + assert get_modality_from_mime_type("text/html") == "document" + + def test_empty_mime_type_returns_image(self): + """Test that empty MIME type defaults to 'image'""" + assert get_modality_from_mime_type("") == "image" + + def test_none_mime_type_returns_image(self): + """Test that None-like values default to 'image'""" + assert get_modality_from_mime_type(None) == "image" + + def test_unknown_mime_type_returns_image(self): + """Test that unknown MIME types default to 'image'""" + assert get_modality_from_mime_type("unknown/type") == "image" + assert get_modality_from_mime_type("custom/format") == "image" + + +class TestTransformOpenAIContentPart: + """Tests for the OpenAI-specific transform function.""" + + def test_image_url_with_data_uri(self): + """Test transforming OpenAI image_url with base64 data URI""" + content_part = { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg=="}, + } + result = transform_openai_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_image_url_with_regular_url(self): + """Test transforming OpenAI image_url with regular URL""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com/image.jpg"}, + } + result = transform_openai_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + def test_image_url_string_format(self): + """Test transforming OpenAI image_url where image_url is a string""" + content_part = { + "type": "image_url", + "image_url": "https://example.com/image.jpg", + } + result = transform_openai_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + def test_image_url_invalid_data_uri(self): + """Test transforming OpenAI image_url with invalid data URI falls back to URI""" + content_part = { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64"}, # Missing comma + } + result = transform_openai_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "data:image/jpeg;base64", + } + + def test_empty_url_returns_none(self): + """Test that image_url with empty URL returns None""" + content_part = {"type": "image_url", "image_url": {"url": ""}} + assert transform_openai_content_part(content_part) is None + + def test_non_image_url_type_returns_none(self): + """Test that non-image_url types return None""" + content_part = {"type": "text", "text": "Hello"} + assert transform_openai_content_part(content_part) is None + + def test_anthropic_format_returns_none(self): + """Test that Anthropic format returns None (not handled)""" + content_part = { + "type": "image", + "source": {"type": "base64", "media_type": "image/png", "data": "abc"}, + } + assert transform_openai_content_part(content_part) is None + + def test_google_format_returns_none(self): + """Test that Google format returns None (not handled)""" + content_part = {"inline_data": {"mime_type": "image/jpeg", "data": "abc"}} + assert transform_openai_content_part(content_part) is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_openai_content_part("string") is None + assert transform_openai_content_part(123) is None + assert transform_openai_content_part(None) is None + + +class TestTransformAnthropicContentPart: + """Tests for the Anthropic-specific transform function.""" + + def test_image_base64(self): + """Test transforming Anthropic image with base64 source""" + content_part = { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": "iVBORw0KGgo=", + }, + } + result = transform_anthropic_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0KGgo=", + } + + def test_image_url(self): + """Test transforming Anthropic image with URL source""" + content_part = { + "type": "image", + "source": { + "type": "url", + "media_type": "image/jpeg", + "url": "https://example.com/image.jpg", + }, + } + result = transform_anthropic_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "image/jpeg", + "uri": "https://example.com/image.jpg", + } + + def test_image_file(self): + """Test transforming Anthropic image with file source""" + content_part = { + "type": "image", + "source": { + "type": "file", + "media_type": "image/jpeg", + "file_id": "file_123", + }, + } + result = transform_anthropic_content_part(content_part) + + assert result == { + "type": "file", + "modality": "image", + "mime_type": "image/jpeg", + "file_id": "file_123", + } + + def test_document_base64(self): + """Test transforming Anthropic document with base64 source""" + content_part = { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": "JVBERi0xLjQ=", + }, + } + result = transform_anthropic_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "document", + "mime_type": "application/pdf", + "content": "JVBERi0xLjQ=", + } + + def test_document_url(self): + """Test transforming Anthropic document with URL source""" + content_part = { + "type": "document", + "source": { + "type": "url", + "media_type": "application/pdf", + "url": "https://example.com/doc.pdf", + }, + } + result = transform_anthropic_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "document", + "mime_type": "application/pdf", + "uri": "https://example.com/doc.pdf", + } + + def test_invalid_source_returns_none(self): + """Test that Anthropic format with invalid source returns None""" + content_part = {"type": "image", "source": "not_a_dict"} + assert transform_anthropic_content_part(content_part) is None + + def test_unknown_source_type_returns_none(self): + """Test that Anthropic format with unknown source type returns None""" + content_part = { + "type": "image", + "source": {"type": "unknown", "data": "something"}, + } + assert transform_anthropic_content_part(content_part) is None + + def test_missing_source_returns_none(self): + """Test that Anthropic format without source returns None""" + content_part = {"type": "image", "data": "something"} + assert transform_anthropic_content_part(content_part) is None + + def test_openai_format_returns_none(self): + """Test that OpenAI format returns None (not handled)""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com"}, + } + assert transform_anthropic_content_part(content_part) is None + + def test_google_format_returns_none(self): + """Test that Google format returns None (not handled)""" + content_part = {"inline_data": {"mime_type": "image/jpeg", "data": "abc"}} + assert transform_anthropic_content_part(content_part) is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_anthropic_content_part("string") is None + assert transform_anthropic_content_part(123) is None + assert transform_anthropic_content_part(None) is None + + +class TestTransformGoogleContentPart: + """Tests for the Google GenAI-specific transform function.""" + + def test_inline_data(self): + """Test transforming Google inline_data format""" + content_part = { + "inline_data": { + "mime_type": "image/jpeg", + "data": "/9j/4AAQSkZJRg==", + } + } + result = transform_google_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_file_data(self): + """Test transforming Google file_data format""" + content_part = { + "file_data": { + "mime_type": "video/mp4", + "file_uri": "gs://bucket/video.mp4", + } + } + result = transform_google_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "video", + "mime_type": "video/mp4", + "uri": "gs://bucket/video.mp4", + } + + def test_inline_data_audio(self): + """Test transforming Google inline_data with audio""" + content_part = { + "inline_data": { + "mime_type": "audio/wav", + "data": "UklGRiQA", + } + } + result = transform_google_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "audio", + "mime_type": "audio/wav", + "content": "UklGRiQA", + } + + def test_inline_data_not_dict_returns_none(self): + """Test that Google inline_data with non-dict value returns None""" + content_part = {"inline_data": "not_a_dict"} + assert transform_google_content_part(content_part) is None + + def test_file_data_not_dict_returns_none(self): + """Test that Google file_data with non-dict value returns None""" + content_part = {"file_data": "not_a_dict"} + assert transform_google_content_part(content_part) is None + + def test_openai_format_returns_none(self): + """Test that OpenAI format returns None (not handled)""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com"}, + } + assert transform_google_content_part(content_part) is None + + def test_anthropic_format_returns_none(self): + """Test that Anthropic format returns None (not handled)""" + content_part = { + "type": "image", + "source": {"type": "base64", "media_type": "image/png", "data": "abc"}, + } + assert transform_google_content_part(content_part) is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_google_content_part("string") is None + assert transform_google_content_part(123) is None + assert transform_google_content_part(None) is None + + +class TestTransformGenericContentPart: + """Tests for the generic/LangChain-style transform function.""" + + def test_image_base64(self): + """Test transforming generic format with base64""" + content_part = { + "type": "image", + "base64": "/9j/4AAQSkZJRg==", + "mime_type": "image/jpeg", + } + result = transform_generic_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_audio_url(self): + """Test transforming generic format with URL""" + content_part = { + "type": "audio", + "url": "https://example.com/audio.mp3", + "mime_type": "audio/mp3", + } + result = transform_generic_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "audio", + "mime_type": "audio/mp3", + "uri": "https://example.com/audio.mp3", + } + + def test_file_with_file_id(self): + """Test transforming generic format with file_id""" + content_part = { + "type": "file", + "file_id": "file_456", + "mime_type": "application/pdf", + } + result = transform_generic_content_part(content_part) + + assert result == { + "type": "file", + "modality": "document", + "mime_type": "application/pdf", + "file_id": "file_456", + } + + def test_video_base64(self): + """Test transforming generic video format""" + content_part = { + "type": "video", + "base64": "AAAA", + "mime_type": "video/mp4", + } + result = transform_generic_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "video", + "mime_type": "video/mp4", + "content": "AAAA", + } + + def test_image_with_source_returns_none(self): + """Test that image with source key (Anthropic style) returns None""" + # This is Anthropic format, should NOT be handled by generic + content_part = { + "type": "image", + "source": {"type": "base64", "data": "abc"}, + } + assert transform_generic_content_part(content_part) is None + + def test_text_type_returns_none(self): + """Test that text type returns None""" + content_part = {"type": "text", "text": "Hello"} + assert transform_generic_content_part(content_part) is None + + def test_openai_format_returns_none(self): + """Test that OpenAI format returns None (not handled)""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com"}, + } + assert transform_generic_content_part(content_part) is None + + def test_google_format_returns_none(self): + """Test that Google format returns None (not handled)""" + content_part = {"inline_data": {"mime_type": "image/jpeg", "data": "abc"}} + assert transform_generic_content_part(content_part) is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_generic_content_part("string") is None + assert transform_generic_content_part(123) is None + assert transform_generic_content_part(None) is None + + def test_missing_data_key_returns_none(self): + """Test that missing data key (base64/url/file_id) returns None""" + content_part = {"type": "image", "mime_type": "image/jpeg"} + assert transform_generic_content_part(content_part) is None + + +class TestTransformContentPart: + # OpenAI/LiteLLM format tests + def test_openai_image_url_with_data_uri(self): + """Test transforming OpenAI image_url with base64 data URI""" + content_part = { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg=="}, + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_openai_image_url_with_regular_url(self): + """Test transforming OpenAI image_url with regular URL""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com/image.jpg"}, + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + def test_openai_image_url_string_format(self): + """Test transforming OpenAI image_url where image_url is a string""" + content_part = { + "type": "image_url", + "image_url": "https://example.com/image.jpg", + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + def test_openai_image_url_invalid_data_uri(self): + """Test transforming OpenAI image_url with invalid data URI falls back to URI""" + content_part = { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64"}, # Missing comma + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "data:image/jpeg;base64", + } + + # Anthropic format tests + def test_anthropic_image_base64(self): + """Test transforming Anthropic image with base64 source""" + content_part = { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": "iVBORw0KGgo=", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0KGgo=", + } + + def test_anthropic_image_url(self): + """Test transforming Anthropic image with URL source""" + content_part = { + "type": "image", + "source": { + "type": "url", + "media_type": "image/jpeg", + "url": "https://example.com/image.jpg", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "image/jpeg", + "uri": "https://example.com/image.jpg", + } + + def test_anthropic_image_file(self): + """Test transforming Anthropic image with file source""" + content_part = { + "type": "image", + "source": { + "type": "file", + "media_type": "image/jpeg", + "file_id": "file_123", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "file", + "modality": "image", + "mime_type": "image/jpeg", + "file_id": "file_123", + } + + def test_anthropic_document_base64(self): + """Test transforming Anthropic document with base64 source""" + content_part = { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": "JVBERi0xLjQ=", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "document", + "mime_type": "application/pdf", + "content": "JVBERi0xLjQ=", + } + + def test_anthropic_document_url(self): + """Test transforming Anthropic document with URL source""" + content_part = { + "type": "document", + "source": { + "type": "url", + "media_type": "application/pdf", + "url": "https://example.com/doc.pdf", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "document", + "mime_type": "application/pdf", + "uri": "https://example.com/doc.pdf", + } + + # Google format tests + def test_google_inline_data(self): + """Test transforming Google inline_data format""" + content_part = { + "inline_data": { + "mime_type": "image/jpeg", + "data": "/9j/4AAQSkZJRg==", + } + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_google_file_data(self): + """Test transforming Google file_data format""" + content_part = { + "file_data": { + "mime_type": "video/mp4", + "file_uri": "gs://bucket/video.mp4", + } + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "video", + "mime_type": "video/mp4", + "uri": "gs://bucket/video.mp4", + } + + def test_google_inline_data_audio(self): + """Test transforming Google inline_data with audio""" + content_part = { + "inline_data": { + "mime_type": "audio/wav", + "data": "UklGRiQA", + } + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "audio", + "mime_type": "audio/wav", + "content": "UklGRiQA", + } + + # Generic format tests (LangChain style) + def test_generic_image_base64(self): + """Test transforming generic format with base64""" + content_part = { + "type": "image", + "base64": "/9j/4AAQSkZJRg==", + "mime_type": "image/jpeg", + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_generic_audio_url(self): + """Test transforming generic format with URL""" + content_part = { + "type": "audio", + "url": "https://example.com/audio.mp3", + "mime_type": "audio/mp3", + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "audio", + "mime_type": "audio/mp3", + "uri": "https://example.com/audio.mp3", + } + + def test_generic_file_with_file_id(self): + """Test transforming generic format with file_id""" + content_part = { + "type": "file", + "file_id": "file_456", + "mime_type": "application/pdf", + } + result = transform_content_part(content_part) + + assert result == { + "type": "file", + "modality": "document", + "mime_type": "application/pdf", + "file_id": "file_456", + } + + def test_generic_video_base64(self): + """Test transforming generic video format""" + content_part = { + "type": "video", + "base64": "AAAA", + "mime_type": "video/mp4", + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "video", + "mime_type": "video/mp4", + "content": "AAAA", + } + + # Edge cases and error handling + def test_text_block_returns_none(self): + """Test that text blocks return None (not transformed)""" + content_part = {"type": "text", "text": "Hello world"} + result = transform_content_part(content_part) + + assert result is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_content_part("string") is None + assert transform_content_part(123) is None + assert transform_content_part(None) is None + assert transform_content_part([1, 2, 3]) is None + + def test_empty_dict_returns_none(self): + """Test that empty dict returns None""" + assert transform_content_part({}) is None + + def test_unknown_type_returns_none(self): + """Test that unknown type returns None""" + content_part = {"type": "unknown", "data": "something"} + assert transform_content_part(content_part) is None + + def test_openai_image_url_empty_url_returns_none(self): + """Test that image_url with empty URL returns None""" + content_part = {"type": "image_url", "image_url": {"url": ""}} + assert transform_content_part(content_part) is None + + def test_anthropic_invalid_source_returns_none(self): + """Test that Anthropic format with invalid source returns None""" + content_part = {"type": "image", "source": "not_a_dict"} + assert transform_content_part(content_part) is None + + def test_anthropic_unknown_source_type_returns_none(self): + """Test that Anthropic format with unknown source type returns None""" + content_part = { + "type": "image", + "source": {"type": "unknown", "data": "something"}, + } + assert transform_content_part(content_part) is None + + def test_google_inline_data_not_dict_returns_none(self): + """Test that Google inline_data with non-dict value returns None""" + content_part = {"inline_data": "not_a_dict"} + assert transform_content_part(content_part) is None + + def test_google_file_data_not_dict_returns_none(self): + """Test that Google file_data with non-dict value returns None""" + content_part = {"file_data": "not_a_dict"} + assert transform_content_part(content_part) is None + + +class TestTransformMessageContent: + def test_string_content_returned_as_is(self): + """Test that string content is returned unchanged""" + content = "Hello, world!" + result = transform_message_content(content) + + assert result == "Hello, world!" + + def test_list_with_transformable_items(self): + """Test transforming a list with transformable content parts""" + content = [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQ"}, + }, + ] + result = transform_message_content(content) + + assert len(result) == 2 + # Text block should be unchanged (transform returns None, so original kept) + assert result[0] == {"type": "text", "text": "What's in this image?"} + # Image should be transformed + assert result[1] == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQ", + } + + def test_list_with_non_dict_items(self): + """Test that non-dict items in list are kept as-is""" + content = ["text string", 123, {"type": "text", "text": "hi"}] + result = transform_message_content(content) + + assert result == ["text string", 123, {"type": "text", "text": "hi"}] + + def test_tuple_content(self): + """Test that tuple content is also handled""" + content = ( + {"type": "text", "text": "Hello"}, + { + "type": "image_url", + "image_url": {"url": "https://example.com/img.jpg"}, + }, + ) + result = transform_message_content(content) + + assert len(result) == 2 + assert result[0] == {"type": "text", "text": "Hello"} + assert result[1] == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/img.jpg", + } + + def test_other_types_returned_as_is(self): + """Test that other types are returned unchanged""" + assert transform_message_content(123) == 123 + assert transform_message_content(None) is None + assert transform_message_content({"key": "value"}) == {"key": "value"} + + def test_mixed_content_types(self): + """Test transforming mixed content with multiple formats""" + content = [ + {"type": "text", "text": "Look at these:"}, + { + "type": "image_url", + "image_url": {"url": "data:image/png;base64,iVBORw0"}, + }, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": "/9j/4AAQ", + }, + }, + {"inline_data": {"mime_type": "audio/wav", "data": "UklGRiQA"}}, + ] + result = transform_message_content(content) + + assert len(result) == 4 + assert result[0] == {"type": "text", "text": "Look at these:"} + assert result[1] == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0", + } + assert result[2] == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQ", + } + assert result[3] == { + "type": "blob", + "modality": "audio", + "mime_type": "audio/wav", + "content": "UklGRiQA", + } + + def test_empty_list(self): + """Test that empty list is returned as empty list""" + assert transform_message_content([]) == []