Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
- name: Set up pixi
uses: prefix-dev/setup-pixi@82d477f15f3a381dbcc8adc1206ce643fe110fb7 # v0.9.3
with:
environments: default lint
environments: default lint polars-minimal
- name: Install Rust
run: rustup show
- name: Cache Rust dependencies
Expand All @@ -45,6 +45,7 @@ jobs:
environment: [py310, py311, py312, py313, py314]
with_optionals: [false]
include:
# Test with optional dependencies across OSes
- os: ubuntu-latest
environment: py314-optionals
with_optionals: true
Expand All @@ -54,6 +55,10 @@ jobs:
- os: macos-latest
environment: py314-optionals
with_optionals: true
# Test polars backward compatibility, only one OS
- os: ubuntu-latest
environment: polars-minimal
with_optionals: false
steps:
- name: Checkout branch
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
Expand Down
9 changes: 8 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ repos:
# workaround for https://github.com/prefix-dev/pixi/issues/1482
- id: pixi-install
name: pixi-install
entry: pixi install -e default -e lint
entry: pixi install -e default -e lint -e polars-minimal
language: system
always_run: true
require_serial: true
Expand Down Expand Up @@ -48,6 +48,13 @@ repos:
language: system
types: [python]
require_serial: true
# mypy with oldest supported polars version
- id: mypy
name: mypy-polars-minimal
entry: pixi run -e default-polars-minimal mypy --cache-dir .mypy_cache_polars_minimal
language: system
types: [python]
require_serial: true
# cargo-fmt
- id: cargo-fmt
name: cargo-fmt
Expand Down
19 changes: 17 additions & 2 deletions dataframely/_compat.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Copyright (c) QuantCo 2025-2026
# SPDX-License-Identifier: BSD-3-Clause


from typing import Any

import polars as pl


class _DummyModule: # pragma: no cover
def __init__(self, module: str) -> None:
Expand Down Expand Up @@ -58,7 +59,6 @@ class Dialect: # type: ignore # noqa: N801
except ImportError:
pa = _DummyModule("pyarrow")


# -------------------------------------- PYDANTIC ------------------------------------ #

try:
Expand All @@ -71,13 +71,28 @@ class Dialect: # type: ignore # noqa: N801
except ImportError:
pydantic_core_schema = _DummyModule("pydantic_core_schema") # type: ignore

# --------------------------------------- POLARS ------------------------------------- #

_polars_version_tuple = tuple(
int(part) if part.isdigit() else part for part in pl.__version__.split(".")
)
if _polars_version_tuple < (1, 36):
from polars._typing import ( # type: ignore[attr-defined,unused-ignore]
PartitioningScheme as PartitionSchemeOrSinkDirectory,
)
else:
from polars.io.partition import ( # type: ignore[no-redef,attr-defined,unused-ignore]
_SinkDirectory as PartitionSchemeOrSinkDirectory,
)

# ------------------------------------------------------------------------------------ #

__all__ = [
"deltalake",
"DeltaTable",
"Dialect",
"MSDialect_pyodbc",
"PartitionSchemeOrSinkDirectory",
"pa",
"PGDialect_psycopg2",
"pydantic_core_schema",
Expand Down
10 changes: 7 additions & 3 deletions dataframely/filter_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
from typing import IO, TYPE_CHECKING, Any, Generic, TypeVar

import polars as pl
from polars.io.partition import _SinkDirectory as SinkDirectory

from dataframely._base_schema import BaseSchema
from dataframely._compat import deltalake

from ._compat import PartitionSchemeOrSinkDirectory
from ._storage import StorageBackend
from ._storage.delta import DeltaStorageBackend
from ._storage.parquet import ParquetStorageBackend
Expand All @@ -32,6 +32,7 @@

S = TypeVar("S", bound=BaseSchema)


# ----------------------------------- FILTER RESULT ---------------------------------- #


Expand Down Expand Up @@ -73,7 +74,8 @@ def collect_all(self, **kwargs: Any) -> FilterResult[S]:
[self.result.lazy(), self.failure._lf], **kwargs
)
return FilterResult(
result=result_df, # type: ignore
# Whether the type ignore is necessary depends on the polars version.
result=result_df, # type: ignore[arg-type,unused-ignore]
failure=FailureInfo(
failure_df.lazy(), self.failure._rule_columns, self.failure.schema
),
Expand Down Expand Up @@ -164,7 +166,9 @@ def write_parquet(self, file: str | Path | IO[bytes], **kwargs: Any) -> None:
self._write(ParquetStorageBackend(), file=file, **kwargs)

def sink_parquet(
self, file: str | Path | IO[bytes] | SinkDirectory, **kwargs: Any
self,
file: str | Path | IO[bytes] | PartitionSchemeOrSinkDirectory,
**kwargs: Any,
) -> None:
"""Stream the failure info to a single parquet file.

Expand Down
7 changes: 3 additions & 4 deletions dataframely/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,11 @@
import polars as pl
import polars.exceptions as plexc
from polars._typing import FileSource
from polars.io.partition import _SinkDirectory as SinkDirectory

from dataframely._compat import deltalake

from ._base_schema import ORIGINAL_COLUMN_PREFIX, BaseSchema
from ._compat import pa, sa
from ._compat import PartitionSchemeOrSinkDirectory, pa, sa
from ._match_to_schema import match_to_schema
from ._native import format_rule_failures
from ._plugin import all_rules, all_rules_horizontal, all_rules_required
Expand Down Expand Up @@ -55,9 +54,9 @@
else:
from typing_extensions import Self


_COLUMN_VALID = "__DATAFRAMELY_VALID__"


# ------------------------------------------------------------------------------------ #
# SCHEMA DEFINITION #
# ------------------------------------------------------------------------------------ #
Expand Down Expand Up @@ -901,7 +900,7 @@ def sink_parquet(
cls,
lf: LazyFrame[Self],
/,
file: str | Path | IO[bytes] | SinkDirectory,
file: str | Path | IO[bytes] | PartitionSchemeOrSinkDirectory,
**kwargs: Any,
) -> None:
"""Stream a typed lazy frame with this schema to a parquet file.
Expand Down
9 changes: 9 additions & 0 deletions docs/guides/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,12 @@ class UserSchema(dy.Schema):
## How do I fix the ruff error `First argument of a method should be named self`?

See our documentation on [group rules](./quickstart.md#group-rules).

## What versions of `polars` does `dataframely` support?

Our CI automatically tests `dataframely` for a minimal supported version of `polars`, which is currently `1.35.*`,
and the latest stable version.
We aim to extend support for new `polars` versions as they are released.

If your `polars` version is not in the range of supported versions, `dataframely` may still work, but you may
encounter unexpected issues.
Loading
Loading