diff --git a/cubejs/__init__.py b/cubejs/__init__.py index 85a182e..3583c3f 100644 --- a/cubejs/__init__.py +++ b/cubejs/__init__.py @@ -7,6 +7,10 @@ CubeJSRequest, CubeJSResponse, Filter, + FilterOperators, + Granularity, + LogicalOperator, + OrderBy, TimeDimension, ) @@ -18,4 +22,8 @@ "CubeJSResponse", "TimeDimension", "Filter", + "OrderBy", + "Granularity", + "FilterOperators", + "LogicalOperator", ] diff --git a/cubejs/model.py b/cubejs/model.py index af9c4f0..52db80f 100644 --- a/cubejs/model.py +++ b/cubejs/model.py @@ -1,23 +1,133 @@ """Data model.""" -from pydantic import BaseModel, Field +from enum import Enum + +from pydantic import BaseModel, Field, model_validator + + +class OrderBy(str, Enum): + """CubeJS order by available options. + + If the order property is not specified in the query, Cube sorts results by default: + 1. First time dimension with granularity (ascending) + 2. If no time dimension exists, first measure (descending) + 3. If no measure exists, first dimension (ascending) + + The order can be specified either as a dict mapping fields to ASC/DESC, + or as an array of tuples for controlling the ordering sequence. + """ + + ASC = "asc" + DESC = "desc" + + +class Granularity(str, Enum): + """CubeJS granularity available for time dimensions. + + Time-based properties are modeled using dimensions of the time type. They allow + grouping the result set by a unit of time (e.g., days, weeks, month, etc.), also + known as the time dimension granularity. + + The following granularities are available by default for any time dimension + """ + + YEAR = "year" + QUARTER = "quarter" + MONTH = "month" + WEEK = "week" + DAY = "day" + HOUR = "hour" + MINUTE = "minute" + SECOND = "second" + + +class FilterOperators(str, Enum): + """CubeJS available filter operations. + + Different operators are available depending on whether they're applied to measures + or dimensions, and for dimensions, the available operators depend on the dimension + type. + + Operators for measures: + - equals, notEquals: Exact match or its opposite. Supports multiple values. + - gt, gte, lt, lte: Greater than, greater than or equal, less than, + less than or equal. + - set, notSet: Checks if value is not NULL or is NULL respectively. + - measureFilter: Applies an existing measure's filters to the current query. + + Operators for dimensions (availability depends on dimension type): + - string: equals, notEquals, contains, notContains, startsWith, notStartsWith, + endsWith, notEndsWith, set, notSet + - number: equals, notEquals, gt, gte, lt, lte, set, notSet + - time: equals, notEquals, inDateRange, notInDateRange, beforeDate, afterDate, + set, notSet + """ + + EQUALS = "equals" + NOT_EQUALS = "notEquals" + CONTAINS = "contains" + NOT_CONTAINS = "notContains" + STARTS_WITH = "startsWith" + NOT_STARTS_WITH = "notStartsWith" + ENDS_WITH = "endsWith" + NOT_ENDS_WITH = "notEndsWith" + GREATER_THAN = "gt" + GREATER_THAN_OR_EQUAL = "gte" + LESS_THAN = "lt" + LESS_THAN_OR_EQUAL = "lte" + SET = "set" + NOT_SET = "notSet" + IN_DATE_RANGE = "inDateRange" + NOT_IN_DATE_RANGE = "notInDateRange" + BEFORE_DATE = "beforeDate" + AFTER_DATE = "afterDate" + MEASURE_FILTER = "measureFilter" class TimeDimension(BaseModel): - """Time dimension section of a cubejs request. + """Time dimension filters and grouping. - Args: - dimension: column name to use as time reference. - granularity: granularity to transform the timestamp. - date_range: date range to filter the query. + Provides a convenient shortcut to pass a dimension and filter as a TimeDimension. + Args: + dimension: Time dimension name to use for filtering and/or grouping. + granularity: A granularity for the time dimension. Can be one of the default + granularities (e.g., year, week, day) or a custom granularity. If not + provided, Cube will only filter by the time dimension without grouping. + date_range: Date range for filtering. Can be: + - An array of dates in YYYY-MM-DD or YYYY-MM-DDTHH:mm:ss.SSS format + - A single date (equivalent to passing two identical dates) + - A string with a relative date range (e.g., "last quarter") + Values should be local and in query timezone. YYYY-MM-DD dates are padded + to start/end of day when used as range boundaries. + compare_date_range: An array of date ranges to compare measure values across + different time periods. """ dimension: str - granularity: str | None = None + granularity: Granularity | None = None date_range: list[str] | str | None = Field( default=None, serialization_alias="dateRange" ) + compare_date_range: list[list[str] | str] | None = Field( + default=None, serialization_alias="compareDateRange" + ) + + @model_validator(mode="after") + def validate_date_ranges(self) -> "TimeDimension": + """Validate date range configurations.""" + if self.date_range is not None and self.compare_date_range is not None: + raise ValueError("Cannot provide both date_range and compare_date_range") + + if self.compare_date_range is not None: + for date_range in self.compare_date_range: + if isinstance(date_range, list) and len(date_range) != 2: + raise ValueError( + "Each compare_date_range entry must contain exactly 2 " + "dates when provided" + ) + + return self class Config: # noqa: D106 exclude_none = True @@ -27,16 +137,52 @@ class Config: # noqa: D106 class Filter(BaseModel): """Filter section of a cubejs request. - Args: - member: member to filter by. - operator: operator to apply. - values: values to filter by. + Filters can be applied to dimensions or measures: + - When filtering dimensions, raw data is restricted before calculations + - When filtering measures, results are restricted after measure calculation + Args: + member: Dimension or measure to filter by (e.g., "stories.isDraft"). + operator: Operator to apply to the filter. Available operators depend on + whether filtering a dimension or measure, and the type of dimension. + See FilterOperators for available options. + values: Array of values for the filter. Values must be strings. + For dates, use YYYY-MM-DD format. Optional for some operators + like 'set' and 'notSet'. """ member: str operator: str - values: list[str] + values: list[str] | None = None + + +class LogicalOperator(BaseModel): + """Logical operator for combining filters. + + Allows combining multiple filters with boolean logic. You can use either 'or_' or + 'and_' to create complex filter conditions. + + Note: + - You cannot mix dimension and measure filters in the same logical operator + - Dimension filters apply to raw data (WHERE clause in SQL) + - Measure filters apply to aggregated data (HAVING clause in SQL) + + Args: + or_: List of filters or other logical operators to combine with OR. + and_: List of filters or other logical operators to combine with AND. + """ + + or_: list["FilterOrLogical"] | None = Field(default=None, serialization_alias="or") + and_: list["FilterOrLogical"] | None = Field( + default=None, serialization_alias="and" + ) + + class Config: # noqa: D106 + exclude_none = True + populate_by_name = True + + +FilterOrLogical = Filter | LogicalOperator class CubeJSRequest(BaseModel): @@ -47,21 +193,23 @@ class CubeJSRequest(BaseModel): time_dimensions: time dimensions to aggregate measures by. dimensions: dimensions to group by. segments: segments to filter by. - filters: other filters to apply. + filters: other filters to apply (can include logical operators). order: order records in response by. limit: limit the number of records in response. + offset: number of records to skip in response. """ - measures: list[str] + measures: list[str] = Field(default_factory=list) time_dimensions: list[TimeDimension] | None = Field( serialization_alias="timeDimensions", default=None ) dimensions: list[str] | None = None segments: list[str] | None = None - filters: list[Filter] | None = None - order: dict[str, str] | None = None + filters: list[FilterOrLogical] = Field(default_factory=list) + order: dict[str, OrderBy] | None = None limit: int | None = None + offset: int | None = None class CubeJSAuth(BaseModel): diff --git a/tests/test_client.py b/tests/test_client.py index eb0887e..7d7c1ef 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -12,6 +12,7 @@ get_measures, ) from cubejs.client import _error_handler +from cubejs.model import FilterOperators, Granularity, OrderBy @pytest.mark.asyncio @@ -49,7 +50,7 @@ async def test_get_metrics(httpx_mock): time_dimensions=[ TimeDimension( dimension="orders.created_at", - granularity="day", + granularity=Granularity.DAY, date_range="last 30 days", ) ], @@ -57,11 +58,11 @@ async def test_get_metrics(httpx_mock): filters=[ Filter( member="orders.status", - operator="equals", + operator=FilterOperators.EQUALS, values=["completed", "processing"], ) ], - order={"orders.count": "desc"}, + order={"orders.count": OrderBy.DESC}, ), ) diff --git a/tests/test_model.py b/tests/test_model.py new file mode 100644 index 0000000..9d6d3a2 --- /dev/null +++ b/tests/test_model.py @@ -0,0 +1,289 @@ +"""Tests for the CubeJS data model.""" + +import pytest +from pydantic import ValidationError + +from cubejs import ( + CubeJSRequest, + Filter, + FilterOperators, + Granularity, + LogicalOperator, + OrderBy, + TimeDimension, +) + + +class TestTimeDimension: + """Test suite for TimeDimension model.""" + + def test_valid_time_dimension(self): + """Test creating a valid time dimension.""" + time_dim = TimeDimension( + dimension="orders.created_at", + granularity=Granularity.MONTH, + date_range=["2023-01-01", "2023-12-31"], + ) + assert time_dim.dimension == "orders.created_at" + assert time_dim.granularity == Granularity.MONTH + assert time_dim.date_range == ["2023-01-01", "2023-12-31"] + assert time_dim.compare_date_range is None + + def test_time_dimension_with_relative_date(self): + """Test time dimension with relative date string.""" + time_dim = TimeDimension( + dimension="orders.created_at", + granularity=Granularity.DAY, + date_range="last week", + ) + assert time_dim.date_range == "last week" + + def test_time_dimension_with_compare_date_range(self): + """Test time dimension with compare date range.""" + time_dim = TimeDimension( + dimension="orders.created_at", + granularity=Granularity.MONTH, + compare_date_range=[ + ["2023-01-01", "2023-03-31"], + ["2022-01-01", "2022-03-31"], + ], + ) + assert time_dim.compare_date_range == [ + ["2023-01-01", "2023-03-31"], + ["2022-01-01", "2022-03-31"], + ] + assert time_dim.date_range is None + + def test_time_dimension_with_mixed_compare_date_range(self): + """Test time dimension with mixed format compare date range.""" + time_dim = TimeDimension( + dimension="orders.created_at", + granularity=Granularity.MONTH, + compare_date_range=[["2023-01-01", "2023-03-31"], "last quarter"], + ) + assert time_dim.compare_date_range == [ + ["2023-01-01", "2023-03-31"], + "last quarter", + ] + + def test_invalid_both_date_ranges(self): + """Test that providing both date_range and compare_date_range is an error.""" + with pytest.raises(ValidationError) as exc_info: + TimeDimension( + dimension="orders.created_at", + date_range=["2023-01-01", "2023-12-31"], + compare_date_range=[["2022-01-01", "2022-12-31"]], + ) + assert "Cannot provide both date_range and compare_date_range" in str( + exc_info.value + ) + + def test_invalid_compare_date_range_length(self): + """Test that compare_date_range entries must have exactly 2 dates when lists.""" + with pytest.raises(ValidationError) as exc_info: + TimeDimension( + dimension="orders.created_at", + compare_date_range=[["2023-01-01", "2023-03-31", "2023-06-30"]], + ) + assert "Each compare_date_range entry must contain exactly 2 dates" in str( + exc_info.value + ) + + +class TestFilter: + """Test suite for Filter model.""" + + def test_equals_filter(self): + """Test creating an equals filter.""" + filter_obj = Filter( + member="products.category", + operator=FilterOperators.EQUALS, + values=["Electronics"], + ) + assert filter_obj.member == "products.category" + assert filter_obj.operator == "equals" + assert filter_obj.values == ["Electronics"] + + def test_not_equals_filter(self): + """Test creating a not equals filter.""" + filter_obj = Filter( + member="products.category", + operator=FilterOperators.NOT_EQUALS, + values=["Clothing"], + ) + assert filter_obj.operator == "notEquals" + assert filter_obj.values == ["Clothing"] + + def test_contains_filter(self): + """Test creating a contains filter.""" + filter_obj = Filter( + member="products.name", + operator=FilterOperators.CONTAINS, + values=["iPhone"], + ) + assert filter_obj.operator == "contains" + + def test_in_date_range_filter(self): + """Test creating an in date range filter.""" + filter_obj = Filter( + member="orders.created_at", + operator=FilterOperators.IN_DATE_RANGE, + values=["2023-01-01", "2023-12-31"], + ) + assert filter_obj.operator == "inDateRange" + assert filter_obj.values == ["2023-01-01", "2023-12-31"] + + def test_set_filter_without_values(self): + """Test creating a set filter without values.""" + filter_obj = Filter( + member="products.description", + operator=FilterOperators.SET, + ) + assert filter_obj.operator == "set" + assert filter_obj.values is None + + +class TestLogicalOperator: + """Test suite for LogicalOperator model.""" + + def test_or_operator(self): + """Test creating an OR logical operator.""" + logical_op = LogicalOperator( + or_=[ + Filter( + member="products.category", + operator=FilterOperators.EQUALS, + values=["Electronics"], + ), + Filter( + member="products.category", + operator=FilterOperators.EQUALS, + values=["Computers"], + ), + ] + ) + assert len(logical_op.or_) == 2 + assert logical_op.and_ is None + + def test_and_operator(self): + """Test creating an AND logical operator.""" + logical_op = LogicalOperator( + and_=[ + Filter( + member="products.price", + operator=FilterOperators.GREATER_THAN, + values=["100"], + ), + Filter( + member="products.price", + operator=FilterOperators.LESS_THAN, + values=["500"], + ), + ] + ) + assert len(logical_op.and_) == 2 + assert logical_op.or_ is None + + def test_nested_logical_operators(self): + """Test nesting logical operators.""" + logical_op = LogicalOperator( + or_=[ + Filter( + member="products.category", + operator=FilterOperators.EQUALS, + values=["Electronics"], + ), + LogicalOperator( + and_=[ + Filter( + member="products.price", + operator=FilterOperators.GREATER_THAN, + values=["100"], + ), + Filter( + member="products.in_stock", + operator=FilterOperators.EQUALS, + values=["true"], + ), + ] + ), + ] + ) + assert len(logical_op.or_) == 2 + assert isinstance(logical_op.or_[1], LogicalOperator) + assert len(logical_op.or_[1].and_) == 2 + + +class TestCubeJSRequest: + """Test suite for CubeJSRequest model.""" + + def test_minimal_request(self): + """Test creating a minimal request with just measures.""" + request = CubeJSRequest(measures=["orders.count", "orders.total_amount"]) + assert request.measures == ["orders.count", "orders.total_amount"] + assert request.dimensions is None + assert request.filters == [] + assert request.time_dimensions is None + + def test_complete_request(self): + """Test creating a complete request with all fields.""" + request = CubeJSRequest( + measures=["orders.count", "orders.total_amount"], + dimensions=["customers.city", "customers.state"], + time_dimensions=[ + TimeDimension( + dimension="orders.created_at", + granularity=Granularity.MONTH, + date_range=["2023-01-01", "2023-12-31"], + ) + ], + filters=[ + Filter( + member="orders.status", + operator=FilterOperators.EQUALS, + values=["completed"], + ), + LogicalOperator( + or_=[ + Filter( + member="orders.total_amount", + operator=FilterOperators.GREATER_THAN, + values=["100"], + ), + Filter( + member="orders.items_count", + operator=FilterOperators.GREATER_THAN, + values=["5"], + ), + ] + ), + ], + order={"orders.total_amount": OrderBy.DESC}, + limit=100, + offset=0, + ) + + assert len(request.measures) == 2 + assert len(request.dimensions) == 2 + assert len(request.time_dimensions) == 1 + assert len(request.filters) == 2 + assert request.order == {"orders.total_amount": OrderBy.DESC} + assert request.limit == 100 + assert request.offset == 0 + + def test_request_serialization(self): + """Test that the request serializes correctly with proper field names.""" + request = CubeJSRequest( + measures=["orders.count"], + time_dimensions=[ + TimeDimension( + dimension="orders.created_at", + granularity=Granularity.MONTH, + date_range=["2023-01-01", "2023-12-31"], + ) + ], + ) + + serialized = request.model_dump(by_alias=True) + assert "timeDimensions" in serialized + assert "dateRange" in serialized["timeDimensions"][0]