From 5bf22fc7e3c392c8bd44315ca2d06d7dca7d084e Mon Sep 17 00:00:00 2001 From: sotech117 Date: Thu, 31 Jul 2025 17:27:24 -0400 Subject: add code for analysis of data --- .../site-packages/narwhals/_ibis/__init__.py | 0 .../site-packages/narwhals/_ibis/dataframe.py | 430 +++++++++++++ .../python3.8/site-packages/narwhals/_ibis/expr.py | 698 +++++++++++++++++++++ .../site-packages/narwhals/_ibis/expr_dt.py | 98 +++ .../site-packages/narwhals/_ibis/expr_list.py | 14 + .../site-packages/narwhals/_ibis/expr_str.py | 103 +++ .../site-packages/narwhals/_ibis/expr_struct.py | 19 + .../site-packages/narwhals/_ibis/group_by.py | 30 + .../site-packages/narwhals/_ibis/namespace.py | 227 +++++++ .../site-packages/narwhals/_ibis/selectors.py | 30 + .../site-packages/narwhals/_ibis/series.py | 41 ++ .../site-packages/narwhals/_ibis/utils.py | 227 +++++++ 12 files changed, 1917 insertions(+) create mode 100644 venv/lib/python3.8/site-packages/narwhals/_ibis/__init__.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_ibis/dataframe.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_ibis/expr.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_ibis/expr_dt.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_ibis/expr_list.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_ibis/expr_str.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_ibis/expr_struct.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_ibis/group_by.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_ibis/namespace.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_ibis/selectors.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_ibis/series.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_ibis/utils.py (limited to 'venv/lib/python3.8/site-packages/narwhals/_ibis') diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/__init__.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/dataframe.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/dataframe.py new file mode 100644 index 0000000..4e18fa6 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/dataframe.py @@ -0,0 +1,430 @@ +from __future__ import annotations + +import operator +from typing import ( + TYPE_CHECKING, + Any, + Iterable, + Iterator, + Literal, + Mapping, + Sequence, + cast, +) + +import ibis +import ibis.expr.types as ir + +from narwhals._ibis.utils import evaluate_exprs, native_to_narwhals_dtype +from narwhals._utils import ( + Implementation, + Version, + not_implemented, + parse_columns_to_drop, + parse_version, + validate_backend_version, +) +from narwhals.exceptions import ColumnNotFoundError, InvalidOperationError +from narwhals.typing import CompliantLazyFrame + +if TYPE_CHECKING: + from types import ModuleType + + import pandas as pd + import pyarrow as pa + from ibis.expr.operations import Binary + from typing_extensions import Self, TypeAlias, TypeIs + + from narwhals._compliant.typing import CompliantDataFrameAny + from narwhals._ibis.expr import IbisExpr + from narwhals._ibis.group_by import IbisGroupBy + from narwhals._ibis.namespace import IbisNamespace + from narwhals._ibis.series import IbisInterchangeSeries + from narwhals._utils import _FullContext + from narwhals.dataframe import LazyFrame + from narwhals.dtypes import DType + from narwhals.stable.v1 import DataFrame as DataFrameV1 + from narwhals.typing import AsofJoinStrategy, JoinStrategy, LazyUniqueKeepStrategy + + JoinPredicates: TypeAlias = "Sequence[ir.BooleanColumn] | Sequence[str]" + + +class IbisLazyFrame( + CompliantLazyFrame[ + "IbisExpr", "ir.Table", "LazyFrame[ir.Table] | DataFrameV1[ir.Table]" + ] +): + _implementation = Implementation.IBIS + + def __init__( + self, df: ir.Table, *, backend_version: tuple[int, ...], version: Version + ) -> None: + self._native_frame: ir.Table = df + self._version = version + self._backend_version = backend_version + self._cached_schema: dict[str, DType] | None = None + self._cached_columns: list[str] | None = None + validate_backend_version(self._implementation, self._backend_version) + + @staticmethod + def _is_native(obj: ir.Table | Any) -> TypeIs[ir.Table]: + return isinstance(obj, ir.Table) + + @classmethod + def from_native(cls, data: ir.Table, /, *, context: _FullContext) -> Self: + return cls( + data, backend_version=context._backend_version, version=context._version + ) + + def to_narwhals(self) -> LazyFrame[ir.Table] | DataFrameV1[ir.Table]: + if self._version is Version.MAIN: + return self._version.lazyframe(self, level="lazy") + + from narwhals.stable.v1 import DataFrame as DataFrameV1 + + return DataFrameV1(self, level="interchange") + + def __narwhals_dataframe__(self) -> Self: # pragma: no cover + # Keep around for backcompat. + if self._version is not Version.V1: + msg = "__narwhals_dataframe__ is not implemented for IbisLazyFrame" + raise AttributeError(msg) + return self + + def __narwhals_lazyframe__(self) -> Self: + return self + + def __native_namespace__(self) -> ModuleType: + return ibis + + def __narwhals_namespace__(self) -> IbisNamespace: + from narwhals._ibis.namespace import IbisNamespace + + return IbisNamespace(backend_version=self._backend_version, version=self._version) + + def get_column(self, name: str) -> IbisInterchangeSeries: + from narwhals._ibis.series import IbisInterchangeSeries + + return IbisInterchangeSeries(self.native.select(name), version=self._version) + + def _iter_columns(self) -> Iterator[ir.Expr]: + for name in self.columns: + yield self.native[name] + + def collect( + self, backend: ModuleType | Implementation | str | None, **kwargs: Any + ) -> CompliantDataFrameAny: + if backend is None or backend is Implementation.PYARROW: + import pyarrow as pa # ignore-banned-import + + from narwhals._arrow.dataframe import ArrowDataFrame + + return ArrowDataFrame( + self.native.to_pyarrow(), + backend_version=parse_version(pa), + version=self._version, + validate_column_names=True, + ) + + if backend is Implementation.PANDAS: + import pandas as pd # ignore-banned-import + + from narwhals._pandas_like.dataframe import PandasLikeDataFrame + + return PandasLikeDataFrame( + self.native.to_pandas(), + implementation=Implementation.PANDAS, + backend_version=parse_version(pd), + version=self._version, + validate_column_names=True, + ) + + if backend is Implementation.POLARS: + import polars as pl # ignore-banned-import + + from narwhals._polars.dataframe import PolarsDataFrame + + return PolarsDataFrame( + self.native.to_polars(), + backend_version=parse_version(pl), + version=self._version, + ) + + msg = f"Unsupported `backend` value: {backend}" # pragma: no cover + raise ValueError(msg) # pragma: no cover + + def head(self, n: int) -> Self: + return self._with_native(self.native.head(n)) + + def simple_select(self, *column_names: str) -> Self: + return self._with_native(self.native.select(*column_names)) + + def aggregate(self, *exprs: IbisExpr) -> Self: + selection = [ + cast("ir.Scalar", val.name(name)) + for name, val in evaluate_exprs(self, *exprs) + ] + return self._with_native(self.native.aggregate(selection)) + + def select(self, *exprs: IbisExpr) -> Self: + selection = [val.name(name) for name, val in evaluate_exprs(self, *exprs)] + if not selection: + msg = "At least one expression must be provided to `select` with the Ibis backend." + raise ValueError(msg) + + t = self.native.select(*selection) + return self._with_native(t) + + def drop(self, columns: Sequence[str], *, strict: bool) -> Self: + columns_to_drop = parse_columns_to_drop(self, columns, strict=strict) + selection = (col for col in self.columns if col not in columns_to_drop) + return self._with_native(self.native.select(*selection)) + + def lazy(self, *, backend: Implementation | None = None) -> Self: + # The `backend`` argument has no effect but we keep it here for + # backwards compatibility because in `narwhals.stable.v1` + # function `.from_native()` will return a DataFrame for Ibis. + + if backend is not None: # pragma: no cover + msg = "`backend` argument is not supported for Ibis" + raise ValueError(msg) + return self + + def with_columns(self, *exprs: IbisExpr) -> Self: + new_columns_map = dict(evaluate_exprs(self, *exprs)) + return self._with_native(self.native.mutate(**new_columns_map)) + + def filter(self, predicate: IbisExpr) -> Self: + # `[0]` is safe as the predicate's expression only returns a single column + mask = cast("ir.BooleanValue", predicate(self)[0]) + return self._with_native(self.native.filter(mask)) + + @property + def schema(self) -> dict[str, DType]: + if self._cached_schema is None: + # Note: prefer `self._cached_schema` over `functools.cached_property` + # due to Python3.13 failures. + self._cached_schema = { + name: native_to_narwhals_dtype(dtype, self._version) + for name, dtype in self.native.schema().fields.items() + } + return self._cached_schema + + @property + def columns(self) -> list[str]: + if self._cached_columns is None: + self._cached_columns = ( + list(self.schema) + if self._cached_schema is not None + else list(self.native.columns) + ) + return self._cached_columns + + def to_pandas(self) -> pd.DataFrame: + # only if version is v1, keep around for backcompat + import pandas as pd # ignore-banned-import() + + if parse_version(pd) >= (1, 0, 0): + return self.native.to_pandas() + else: # pragma: no cover + msg = f"Conversion to pandas requires pandas>=1.0.0, found {pd.__version__}" + raise NotImplementedError(msg) + + def to_arrow(self) -> pa.Table: + # only if version is v1, keep around for backcompat + return self.native.to_pyarrow() + + def _with_version(self, version: Version) -> Self: + return self.__class__( + self.native, version=version, backend_version=self._backend_version + ) + + def _with_native(self, df: ir.Table) -> Self: + return self.__class__( + df, backend_version=self._backend_version, version=self._version + ) + + def group_by( + self, keys: Sequence[str] | Sequence[IbisExpr], *, drop_null_keys: bool + ) -> IbisGroupBy: + from narwhals._ibis.group_by import IbisGroupBy + + return IbisGroupBy(self, keys, drop_null_keys=drop_null_keys) + + def rename(self, mapping: Mapping[str, str]) -> Self: + def _rename(col: str) -> str: + return mapping.get(col, col) + + return self._with_native(self.native.rename(_rename)) + + @staticmethod + def _join_drop_duplicate_columns(df: ir.Table, columns: Iterable[str], /) -> ir.Table: + """Ibis adds a suffix to the right table col, even when it matches the left during a join.""" + duplicates = set(df.columns).intersection(columns) + return df.drop(*duplicates) if duplicates else df + + def join( + self, + other: Self, + *, + how: JoinStrategy, + left_on: Sequence[str] | None, + right_on: Sequence[str] | None, + suffix: str, + ) -> Self: + how_native = "outer" if how == "full" else how + rname = "{name}" + suffix + if other == self: + # Ibis does not support self-references unless created as a view + other = self._with_native(other.native.view()) + if how_native == "cross": + joined = self.native.join(other.native, how=how_native, rname=rname) + return self._with_native(joined) + # help mypy + assert left_on is not None # noqa: S101 + assert right_on is not None # noqa: S101 + predicates = self._convert_predicates(other, left_on, right_on) + joined = self.native.join(other.native, predicates, how=how_native, rname=rname) + if how_native == "left": + right_names = (n + suffix for n in right_on) + joined = self._join_drop_duplicate_columns(joined, right_names) + it = (cast("Binary", p.op()) for p in predicates if not isinstance(p, str)) + to_drop = [] + for pred in it: + right = pred.right.name + # Mirrors how polars works. + if right not in self.columns and pred.left.name != right: + to_drop.append(right) + if to_drop: + joined = joined.drop(*to_drop) + return self._with_native(joined) + + def join_asof( + self, + other: Self, + *, + left_on: str, + right_on: str, + by_left: Sequence[str] | None, + by_right: Sequence[str] | None, + strategy: AsofJoinStrategy, + suffix: str, + ) -> Self: + rname = "{name}" + suffix + strategy_op = {"backward": operator.ge, "forward": operator.le} + predicates: JoinPredicates = [] + if op := strategy_op.get(strategy): + on: ir.BooleanColumn = op(self.native[left_on], other.native[right_on]) + else: + msg = "Only `backward` and `forward` strategies are currently supported for Ibis" + raise NotImplementedError(msg) + if by_left is not None and by_right is not None: + predicates = self._convert_predicates(other, by_left, by_right) + joined = self.native.asof_join(other.native, on, predicates, rname=rname) + joined = self._join_drop_duplicate_columns(joined, [right_on + suffix]) + if by_right is not None: + right_names = (n + suffix for n in by_right) + joined = self._join_drop_duplicate_columns(joined, right_names) + return self._with_native(joined) + + def _convert_predicates( + self, other: Self, left_on: Sequence[str], right_on: Sequence[str] + ) -> JoinPredicates: + if left_on == right_on: + return left_on + return [ + cast("ir.BooleanColumn", (self.native[left] == other.native[right])) + for left, right in zip(left_on, right_on) + ] + + def collect_schema(self) -> dict[str, DType]: + return { + name: native_to_narwhals_dtype(dtype, self._version) + for name, dtype in self.native.schema().fields.items() + } + + def unique( + self, subset: Sequence[str] | None, *, keep: LazyUniqueKeepStrategy + ) -> Self: + if subset_ := subset if keep == "any" else (subset or self.columns): + # Sanitise input + if any(x not in self.columns for x in subset_): + msg = f"Columns {set(subset_).difference(self.columns)} not found in {self.columns}." + raise ColumnNotFoundError(msg) + + mapped_keep: dict[str, Literal["first"] | None] = { + "any": "first", + "none": None, + } + to_keep = mapped_keep[keep] + return self._with_native(self.native.distinct(on=subset_, keep=to_keep)) + return self._with_native(self.native.distinct(on=subset)) + + def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) -> Self: + if isinstance(descending, bool): + descending = [descending for _ in range(len(by))] + + sort_cols = [] + + for i in range(len(by)): + direction_fn = ibis.desc if descending[i] else ibis.asc + col = direction_fn(by[i], nulls_first=not nulls_last) + sort_cols.append(cast("ir.Column", col)) + + return self._with_native(self.native.order_by(*sort_cols)) + + def drop_nulls(self, subset: Sequence[str] | None) -> Self: + subset_ = subset if subset is not None else self.columns + return self._with_native(self.native.drop_null(subset_)) + + def explode(self, columns: Sequence[str]) -> Self: + dtypes = self._version.dtypes + schema = self.collect_schema() + for col in columns: + dtype = schema[col] + + if dtype != dtypes.List: + msg = ( + f"`explode` operation not supported for dtype `{dtype}`, " + "expected List type" + ) + raise InvalidOperationError(msg) + + if len(columns) != 1: + msg = ( + "Exploding on multiple columns is not supported with Ibis backend since " + "we cannot guarantee that the exploded columns have matching element counts." + ) + raise NotImplementedError(msg) + + return self._with_native(self.native.unnest(columns[0], keep_empty=True)) + + def unpivot( + self, + on: Sequence[str] | None, + index: Sequence[str] | None, + variable_name: str, + value_name: str, + ) -> Self: + import ibis.selectors as s + + index_: Sequence[str] = [] if index is None else index + on_: Sequence[str] = ( + [c for c in self.columns if c not in index_] if on is None else on + ) + + # Discard columns not in the index + final_columns = list(dict.fromkeys([*index_, variable_name, value_name])) + + unpivoted = self.native.pivot_longer( + s.cols(*on_), names_to=variable_name, values_to=value_name + ) + return self._with_native(unpivoted.select(*final_columns)) + + gather_every = not_implemented.deprecated( + "`LazyFrame.gather_every` is deprecated and will be removed in a future version." + ) + tail = not_implemented.deprecated( + "`LazyFrame.tail` is deprecated and will be removed in a future version." + ) + with_row_index = not_implemented() diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/expr.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr.py new file mode 100644 index 0000000..4fc8c79 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr.py @@ -0,0 +1,698 @@ +from __future__ import annotations + +import operator +from functools import partial +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Iterator, + Literal, + Sequence, + TypeVar, + cast, +) + +import ibis + +from narwhals._compliant import LazyExpr +from narwhals._compliant.window import WindowInputs +from narwhals._ibis.expr_dt import IbisExprDateTimeNamespace +from narwhals._ibis.expr_list import IbisExprListNamespace +from narwhals._ibis.expr_str import IbisExprStringNamespace +from narwhals._ibis.expr_struct import IbisExprStructNamespace +from narwhals._ibis.utils import is_floating, lit, narwhals_to_native_dtype +from narwhals._utils import Implementation, not_implemented + +if TYPE_CHECKING: + import ibis.expr.types as ir + from typing_extensions import Self + + from narwhals._compliant.typing import ( + AliasNames, + EvalNames, + EvalSeries, + WindowFunction, + ) + from narwhals._expression_parsing import ExprKind, ExprMetadata + from narwhals._ibis.dataframe import IbisLazyFrame + from narwhals._ibis.namespace import IbisNamespace + from narwhals._utils import Version, _FullContext + from narwhals.typing import IntoDType, RankMethod, RollingInterpolationMethod + + ExprT = TypeVar("ExprT", bound=ir.Value) + IbisWindowFunction = WindowFunction[IbisLazyFrame, ir.Value] + IbisWindowInputs = WindowInputs[ir.Value] + + +class IbisExpr(LazyExpr["IbisLazyFrame", "ir.Column"]): + _implementation = Implementation.IBIS + + def __init__( + self, + call: EvalSeries[IbisLazyFrame, ir.Value], + window_function: IbisWindowFunction | None = None, + *, + evaluate_output_names: EvalNames[IbisLazyFrame], + alias_output_names: AliasNames | None, + backend_version: tuple[int, ...], + version: Version, + ) -> None: + self._call = call + self._evaluate_output_names = evaluate_output_names + self._alias_output_names = alias_output_names + self._backend_version = backend_version + self._version = version + self._metadata: ExprMetadata | None = None + self._window_function: IbisWindowFunction | None = window_function + + @property + def window_function(self) -> IbisWindowFunction: + def default_window_func( + df: IbisLazyFrame, window_inputs: IbisWindowInputs + ) -> list[ir.Value]: + assert not window_inputs.order_by # noqa: S101 + return [ + expr.over(ibis.window(group_by=window_inputs.partition_by)) + for expr in self(df) + ] + + return self._window_function or default_window_func + + def __call__(self, df: IbisLazyFrame) -> Sequence[ir.Value]: + return self._call(df) + + def __narwhals_expr__(self) -> None: ... + + def __narwhals_namespace__(self) -> IbisNamespace: # pragma: no cover + # Unused, just for compatibility with PandasLikeExpr + from narwhals._ibis.namespace import IbisNamespace + + return IbisNamespace(backend_version=self._backend_version, version=self._version) + + def _cum_window_func( + self, *, reverse: bool, func_name: Literal["sum", "max", "min", "count"] + ) -> IbisWindowFunction: + def func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]: + window = ibis.window( + group_by=list(inputs.partition_by), + order_by=self._sort( + *inputs.order_by, descending=reverse, nulls_last=reverse + ), + preceding=None, # unbounded + following=0, + ) + + return [getattr(expr, func_name)().over(window) for expr in self(df)] + + return func + + def _rolling_window_func( + self, + *, + func_name: Literal["sum", "mean", "std", "var"], + center: bool, + window_size: int, + min_samples: int, + ddof: int | None = None, + ) -> IbisWindowFunction: + supported_funcs = ["sum", "mean", "std", "var"] + + if center: + preceding = window_size // 2 + following = window_size - preceding - 1 + else: + preceding = window_size - 1 + following = 0 + + def func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]: + window = ibis.window( + group_by=list(inputs.partition_by), + order_by=self._sort(*inputs.order_by), + preceding=preceding, + following=following, + ) + + def inner_f(expr: ir.NumericColumn) -> ir.Value: + if func_name in {"sum", "mean"}: + func_ = getattr(expr, func_name)() + elif func_name == "var" and ddof == 0: + func_ = expr.var(how="pop") + elif func_name in "var" and ddof == 1: + func_ = expr.var(how="sample") + elif func_name == "std" and ddof == 0: + func_ = expr.std(how="pop") + elif func_name == "std" and ddof == 1: + func_ = expr.std(how="sample") + elif func_name in {"var", "std"}: # pragma: no cover + msg = f"Only ddof=0 and ddof=1 are currently supported for rolling_{func_name}." + raise ValueError(msg) + else: # pragma: no cover + msg = f"Only the following functions are supported: {supported_funcs}.\nGot: {func_name}." + raise ValueError(msg) + + rolling_calc = func_.over(window) + valid_count = expr.count().over(window) + return ibis.cases( + (valid_count >= ibis.literal(min_samples), rolling_calc), + else_=ibis.null(), + ) + + return [inner_f(cast("ir.NumericColumn", expr)) for expr in self(df)] + + return func + + def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self: + # Ibis does its own broadcasting. + return self + + def _sort( + self, *cols: ir.Column | str, descending: bool = False, nulls_last: bool = False + ) -> Iterator[ir.Column]: + mapping = { + (False, False): partial(ibis.asc, nulls_first=True), + (False, True): partial(ibis.asc, nulls_first=False), + (True, False): partial(ibis.desc, nulls_first=True), + (True, True): partial(ibis.desc, nulls_first=False), + } + sort = mapping[(descending, nulls_last)] + yield from (cast("ir.Column", sort(col)) for col in cols) + + @classmethod + def from_column_names( + cls: type[Self], + evaluate_column_names: EvalNames[IbisLazyFrame], + /, + *, + context: _FullContext, + ) -> Self: + def func(df: IbisLazyFrame) -> list[ir.Column]: + return [df.native[name] for name in evaluate_column_names(df)] + + return cls( + func, + evaluate_output_names=evaluate_column_names, + alias_output_names=None, + backend_version=context._backend_version, + version=context._version, + ) + + @classmethod + def from_column_indices(cls, *column_indices: int, context: _FullContext) -> Self: + def func(df: IbisLazyFrame) -> list[ir.Column]: + return [df.native[i] for i in column_indices] + + return cls( + func, + evaluate_output_names=cls._eval_names_indices(column_indices), + alias_output_names=None, + backend_version=context._backend_version, + version=context._version, + ) + + def _with_callable( + self, call: Callable[..., ir.Value], /, **expressifiable_args: Self | Any + ) -> Self: + """Create expression from callable. + + Arguments: + call: Callable from compliant DataFrame to native Expression + expr_name: Expression name + expressifiable_args: arguments pass to expression which should be parsed + as expressions (e.g. in `nw.col('a').is_between('b', 'c')`) + """ + + def func(df: IbisLazyFrame) -> list[ir.Value]: + native_series_list = self(df) + other_native_series = { + key: df._evaluate_expr(value) if self._is_expr(value) else value + for key, value in expressifiable_args.items() + } + return [ + call(native_series, **other_native_series) + for native_series in native_series_list + ] + + return self.__class__( + func, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + backend_version=self._backend_version, + version=self._version, + ) + + def _with_alias_output_names(self, func: AliasNames | None, /) -> Self: + return type(self)( + self._call, + self._window_function, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=func, + backend_version=self._backend_version, + version=self._version, + ) + + def _with_window_function(self, window_function: IbisWindowFunction) -> Self: + return self.__class__( + self._call, + window_function, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + backend_version=self._backend_version, + version=self._version, + ) + + @classmethod + def _alias_native(cls, expr: ExprT, name: str, /) -> ExprT: + return cast("ExprT", expr.name(name)) + + def __and__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda expr, other: expr & other, other=other) + + def __or__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda expr, other: expr | other, other=other) + + def __add__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda expr, other: expr + other, other=other) + + def __truediv__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda expr, other: expr / other, other=other) + + def __rtruediv__(self, other: IbisExpr) -> Self: + return self._with_callable( + lambda expr, other: expr.__rtruediv__(other), other=other + ).alias("literal") + + def __floordiv__(self, other: IbisExpr) -> Self: + return self._with_callable( + lambda expr, other: expr.__floordiv__(other), other=other + ) + + def __rfloordiv__(self, other: IbisExpr) -> Self: + return self._with_callable( + lambda expr, other: expr.__rfloordiv__(other), other=other + ).alias("literal") + + def __mod__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda expr, other: expr.__mod__(other), other=other) + + def __rmod__(self, other: IbisExpr) -> Self: + return self._with_callable( + lambda expr, other: expr.__rmod__(other), other=other + ).alias("literal") + + def __sub__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda expr, other: expr - other, other=other) + + def __rsub__(self, other: IbisExpr) -> Self: + return self._with_callable( + lambda expr, other: expr.__rsub__(other), other=other + ).alias("literal") + + def __mul__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda expr, other: expr * other, other=other) + + def __pow__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda expr, other: expr**other, other=other) + + def __rpow__(self, other: IbisExpr) -> Self: + return self._with_callable( + lambda expr, other: expr.__rpow__(other), other=other + ).alias("literal") + + def __lt__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda expr, other: expr < other, other=other) + + def __gt__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda expr, other: expr > other, other=other) + + def __le__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda expr, other: expr <= other, other=other) + + def __ge__(self, other: IbisExpr) -> Self: + return self._with_callable(lambda expr, other: expr >= other, other=other) + + def __eq__(self, other: IbisExpr) -> Self: # type: ignore[override] + return self._with_callable(lambda expr, other: expr == other, other=other) + + def __ne__(self, other: IbisExpr) -> Self: # type: ignore[override] + return self._with_callable(lambda expr, other: expr != other, other=other) + + def __invert__(self) -> Self: + invert = cast("Callable[..., ir.Value]", operator.invert) + return self._with_callable(invert) + + def abs(self) -> Self: + return self._with_callable(lambda expr: expr.abs()) + + def mean(self) -> Self: + return self._with_callable(lambda expr: expr.mean()) + + def median(self) -> Self: + return self._with_callable(lambda expr: expr.median()) + + def all(self) -> Self: + return self._with_callable(lambda expr: expr.all().fill_null(lit(True))) # noqa: FBT003 + + def any(self) -> Self: + return self._with_callable(lambda expr: expr.any().fill_null(lit(False))) # noqa: FBT003 + + def quantile( + self, quantile: float, interpolation: RollingInterpolationMethod + ) -> Self: + if interpolation != "linear": + msg = "Only linear interpolation methods are supported for Ibis quantile." + raise NotImplementedError(msg) + return self._with_callable(lambda expr: expr.quantile(quantile)) + + def clip(self, lower_bound: Any, upper_bound: Any) -> Self: + def _clip(expr: ir.NumericValue, lower: Any, upper: Any) -> ir.NumericValue: + return expr.clip(lower=lower, upper=upper) + + return self._with_callable(_clip, lower=lower_bound, upper=upper_bound) + + def sum(self) -> Self: + return self._with_callable(lambda expr: expr.sum().fill_null(lit(0))) + + def n_unique(self) -> Self: + return self._with_callable( + lambda expr: expr.nunique() + expr.isnull().any().cast("int8") + ) + + def count(self) -> Self: + return self._with_callable(lambda expr: expr.count()) + + def len(self) -> Self: + def func(df: IbisLazyFrame) -> list[ir.IntegerScalar]: + return [df.native.count()] + + return self.__class__( + func, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + backend_version=self._backend_version, + version=self._version, + ) + + def std(self, ddof: int) -> Self: + def _std(expr: ir.NumericColumn, ddof: int) -> ir.Value: + if ddof == 0: + return expr.std(how="pop") + elif ddof == 1: + return expr.std(how="sample") + else: + n_samples = expr.count() + std_pop = expr.std(how="pop") + ddof_lit = cast("ir.IntegerScalar", ibis.literal(ddof)) + return std_pop * n_samples.sqrt() / (n_samples - ddof_lit).sqrt() + + return self._with_callable(lambda expr: _std(expr, ddof)) + + def var(self, ddof: int) -> Self: + def _var(expr: ir.NumericColumn, ddof: int) -> ir.Value: + if ddof == 0: + return expr.var(how="pop") + elif ddof == 1: + return expr.var(how="sample") + else: + n_samples = expr.count() + var_pop = expr.var(how="pop") + ddof_lit = cast("ir.IntegerScalar", ibis.literal(ddof)) + return var_pop * n_samples / (n_samples - ddof_lit) + + return self._with_callable(lambda expr: _var(expr, ddof)) + + def max(self) -> Self: + return self._with_callable(lambda expr: expr.max()) + + def min(self) -> Self: + return self._with_callable(lambda expr: expr.min()) + + def null_count(self) -> Self: + return self._with_callable(lambda expr: expr.isnull().sum()) + + def over(self, partition_by: Sequence[str], order_by: Sequence[str]) -> Self: + def func(df: IbisLazyFrame) -> Sequence[ir.Value]: + return self.window_function(df, WindowInputs(partition_by, order_by)) + + return self.__class__( + func, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + backend_version=self._backend_version, + version=self._version, + ) + + def is_null(self) -> Self: + return self._with_callable(lambda expr: expr.isnull()) + + def is_nan(self) -> Self: + def func(expr: ir.FloatingValue | Any) -> ir.Value: + otherwise = expr.isnan() if is_floating(expr.type()) else False + return ibis.ifelse(expr.isnull(), None, otherwise) + + return self._with_callable(func) + + def is_finite(self) -> Self: + return self._with_callable(lambda expr: ~(expr.isinf() | expr.isnan())) + + def is_in(self, other: Sequence[Any]) -> Self: + return self._with_callable(lambda expr: expr.isin(other)) + + def round(self, decimals: int) -> Self: + return self._with_callable(lambda expr: expr.round(decimals)) + + def shift(self, n: int) -> Self: + def _func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]: + return [ + expr.lag(n).over( # type: ignore[attr-defined, unused-ignore] + ibis.window( + group_by=inputs.partition_by, + order_by=self._sort(*inputs.order_by), + ) + ) + for expr in self(df) + ] + + return self._with_window_function(_func) + + def is_first_distinct(self) -> Self: + def func( + df: IbisLazyFrame, inputs: IbisWindowInputs + ) -> Sequence[ir.BooleanValue]: + # ibis row_number starts at 0, so need to compare with 0 instead of the usual `1` + return [ + ibis.row_number().over( + ibis.window( + group_by=[*inputs.partition_by, expr], + order_by=self._sort(*inputs.order_by), + ) + ) + == lit(0) + for expr in self(df) + ] + + return self._with_window_function(func) + + def is_last_distinct(self) -> Self: + def func( + df: IbisLazyFrame, inputs: IbisWindowInputs + ) -> Sequence[ir.BooleanValue]: + # ibis row_number starts at 0, so need to compare with 0 instead of the usual `1` + return [ + ibis.row_number().over( + ibis.window( + group_by=[*inputs.partition_by, expr], + order_by=self._sort( + *inputs.order_by, descending=True, nulls_last=True + ), + ) + ) + == lit(0) + for expr in self(df) + ] + + return self._with_window_function(func) + + def diff(self) -> Self: + def _func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]: + return [ + expr + - expr.lag().over( # type: ignore[attr-defined, unused-ignore] + ibis.window( + following=0, + group_by=inputs.partition_by, + order_by=self._sort(*inputs.order_by), + ) + ) + for expr in self(df) + ] + + return self._with_window_function(_func) + + def cum_sum(self, *, reverse: bool) -> Self: + return self._with_window_function( + self._cum_window_func(reverse=reverse, func_name="sum") + ) + + def cum_max(self, *, reverse: bool) -> Self: + return self._with_window_function( + self._cum_window_func(reverse=reverse, func_name="max") + ) + + def cum_min(self, *, reverse: bool) -> Self: + return self._with_window_function( + self._cum_window_func(reverse=reverse, func_name="min") + ) + + def cum_count(self, *, reverse: bool) -> Self: + return self._with_window_function( + self._cum_window_func(reverse=reverse, func_name="count") + ) + + def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self: + return self._with_window_function( + self._rolling_window_func( + func_name="sum", + center=center, + window_size=window_size, + min_samples=min_samples, + ) + ) + + def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self: + return self._with_window_function( + self._rolling_window_func( + func_name="mean", + center=center, + window_size=window_size, + min_samples=min_samples, + ) + ) + + def rolling_var( + self, window_size: int, *, min_samples: int, center: bool, ddof: int + ) -> Self: + return self._with_window_function( + self._rolling_window_func( + func_name="var", + center=center, + window_size=window_size, + min_samples=min_samples, + ddof=ddof, + ) + ) + + def rolling_std( + self, window_size: int, *, min_samples: int, center: bool, ddof: int + ) -> Self: + return self._with_window_function( + self._rolling_window_func( + func_name="std", + center=center, + window_size=window_size, + min_samples=min_samples, + ddof=ddof, + ) + ) + + def fill_null(self, value: Self | Any, strategy: Any, limit: int | None) -> Self: + # Ibis doesn't yet allow ignoring nulls in first/last with window functions, which makes forward/backward + # strategies inconsistent when there are nulls present: https://github.com/ibis-project/ibis/issues/9539 + if strategy is not None: + msg = "`strategy` is not supported for the Ibis backend" + raise NotImplementedError(msg) + if limit is not None: + msg = "`limit` is not supported for the Ibis backend" # pragma: no cover + raise NotImplementedError(msg) + + def _fill_null(expr: ir.Value, value: ir.Scalar) -> ir.Value: + return expr.fill_null(value) + + return self._with_callable(_fill_null, value=value) + + def cast(self, dtype: IntoDType) -> Self: + def _func(expr: ir.Column) -> ir.Value: + native_dtype = narwhals_to_native_dtype(dtype, self._version) + # ibis `cast` overloads do not include DataType, only literals + return expr.cast(native_dtype) # type: ignore[unused-ignore] + + return self._with_callable(_func) + + def is_unique(self) -> Self: + return self._with_callable( + lambda expr: expr.isnull().count().over(ibis.window(group_by=(expr))) == 1 + ) + + def rank(self, method: RankMethod, *, descending: bool) -> Self: + def _rank(expr: ir.Column) -> ir.Column: + order_by = next(self._sort(expr, descending=descending, nulls_last=True)) + window = ibis.window(order_by=order_by) + + if method == "dense": + rank_ = order_by.dense_rank() + elif method == "ordinal": + rank_ = cast("ir.IntegerColumn", ibis.row_number().over(window)) + else: + rank_ = order_by.rank() + + # Ibis uses 0-based ranking. Add 1 to match polars 1-based rank. + rank_ = rank_ + cast("ir.IntegerValue", lit(1)) + + # For "max" and "average", adjust using the count of rows in the partition. + if method == "max": + # Define a window partitioned by expr (i.e. each distinct value) + partition = ibis.window(group_by=[expr]) + cnt = cast("ir.IntegerValue", expr.count().over(partition)) + rank_ = rank_ + cnt - cast("ir.IntegerValue", lit(1)) + elif method == "average": + partition = ibis.window(group_by=[expr]) + cnt = cast("ir.IntegerValue", expr.count().over(partition)) + avg = cast( + "ir.NumericValue", (cnt - cast("ir.IntegerScalar", lit(1))) / lit(2.0) + ) + rank_ = rank_ + avg + + return cast("ir.Column", ibis.cases((expr.notnull(), rank_))) + + return self._with_callable(_rank) + + def log(self, base: float) -> Self: + def _log(expr: ir.NumericColumn) -> ir.Value: + otherwise = expr.log(cast("ir.NumericValue", lit(base))) + return ibis.cases( + (expr < lit(0), lit(float("nan"))), + (expr == lit(0), lit(float("-inf"))), + else_=otherwise, + ) + + return self._with_callable(_log) + + def exp(self) -> Self: + def _exp(expr: ir.NumericColumn) -> ir.Value: + return expr.exp() + + return self._with_callable(_exp) + + @property + def str(self) -> IbisExprStringNamespace: + return IbisExprStringNamespace(self) + + @property + def dt(self) -> IbisExprDateTimeNamespace: + return IbisExprDateTimeNamespace(self) + + @property + def list(self) -> IbisExprListNamespace: + return IbisExprListNamespace(self) + + @property + def struct(self) -> IbisExprStructNamespace: + return IbisExprStructNamespace(self) + + # NOTE: https://github.com/ibis-project/ibis/issues/10542 + cum_prod = not_implemented() + drop_nulls = not_implemented() + + # NOTE: https://github.com/ibis-project/ibis/issues/11176 + skew = not_implemented() + unique = not_implemented() diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_dt.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_dt.py new file mode 100644 index 0000000..14d9d06 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_dt.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Callable + +from narwhals._duration import parse_interval_string +from narwhals._ibis.utils import UNITS_DICT_BUCKET, UNITS_DICT_TRUNCATE +from narwhals._utils import not_implemented + +if TYPE_CHECKING: + import ibis.expr.types as ir + + from narwhals._ibis.expr import IbisExpr + from narwhals._ibis.utils import BucketUnit, TruncateUnit + + +class IbisExprDateTimeNamespace: + def __init__(self, expr: IbisExpr) -> None: + self._compliant_expr = expr + + def year(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda expr: expr.year()) + + def month(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda expr: expr.month()) + + def day(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda expr: expr.day()) + + def hour(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda expr: expr.hour()) + + def minute(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda expr: expr.minute()) + + def second(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda expr: expr.second()) + + def millisecond(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda expr: expr.millisecond()) + + def microsecond(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda expr: expr.microsecond()) + + def to_string(self, format: str) -> IbisExpr: + return self._compliant_expr._with_callable(lambda expr: expr.strftime(format)) + + def weekday(self) -> IbisExpr: + # Ibis uses 0-6 for Monday-Sunday. Add 1 to match polars. + return self._compliant_expr._with_callable( + lambda expr: expr.day_of_week.index() + 1 + ) + + def ordinal_day(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda expr: expr.day_of_year()) + + def date(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda expr: expr.date()) + + def _bucket(self, kwds: dict[BucketUnit, Any], /) -> Callable[..., ir.TimestampValue]: + def fn(expr: ir.TimestampValue) -> ir.TimestampValue: + return expr.bucket(**kwds) + + return fn + + def _truncate(self, unit: TruncateUnit, /) -> Callable[..., ir.TimestampValue]: + def fn(expr: ir.TimestampValue) -> ir.TimestampValue: + return expr.truncate(unit) + + return fn + + def truncate(self, every: str) -> IbisExpr: + multiple, unit = parse_interval_string(every) + if unit == "q": + multiple, unit = 3 * multiple, "mo" + if multiple != 1: + if self._compliant_expr._backend_version < (7, 1): # pragma: no cover + msg = "Truncating datetimes with multiples of the unit is only supported in Ibis >= 7.1." + raise NotImplementedError(msg) + fn = self._bucket({UNITS_DICT_BUCKET[unit]: multiple}) + else: + fn = self._truncate(UNITS_DICT_TRUNCATE[unit]) + return self._compliant_expr._with_callable(fn) + + def replace_time_zone(self, time_zone: str | None) -> IbisExpr: + if time_zone is None: + return self._compliant_expr._with_callable( + lambda _input: _input.cast("timestamp") + ) + else: # pragma: no cover + msg = "`replace_time_zone` with non-null `time_zone` not yet implemented for Ibis" + raise NotImplementedError(msg) + + nanosecond = not_implemented() + total_minutes = not_implemented() + total_seconds = not_implemented() + total_milliseconds = not_implemented() + total_microseconds = not_implemented() + total_nanoseconds = not_implemented() diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_list.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_list.py new file mode 100644 index 0000000..b29fc83 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_list.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from narwhals._ibis.expr import IbisExpr + + +class IbisExprListNamespace: + def __init__(self, expr: IbisExpr) -> None: + self._compliant_expr = expr + + def len(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda expr: expr.length()) diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_str.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_str.py new file mode 100644 index 0000000..1c0d6e5 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_str.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Callable + +from ibis.expr.datatypes import Timestamp + +from narwhals._utils import _is_naive_format, not_implemented + +if TYPE_CHECKING: + import ibis.expr.types as ir + + from narwhals._ibis.expr import IbisExpr + + +class IbisExprStringNamespace: + def __init__(self, expr: IbisExpr) -> None: + self._compliant_expr = expr + + def starts_with(self, prefix: str) -> IbisExpr: + def fn(expr: ir.StringColumn) -> ir.BooleanValue: + return expr.startswith(prefix) + + return self._compliant_expr._with_callable(fn) + + def ends_with(self, suffix: str) -> IbisExpr: + def fn(expr: ir.StringColumn) -> ir.BooleanValue: + return expr.endswith(suffix) + + return self._compliant_expr._with_callable(fn) + + def contains(self, pattern: str, *, literal: bool) -> IbisExpr: + def fn(expr: ir.StringColumn) -> ir.BooleanValue: + return expr.contains(pattern) if literal else expr.re_search(pattern) + + return self._compliant_expr._with_callable(fn) + + def slice(self, offset: int, length: int) -> IbisExpr: + def fn(expr: ir.StringColumn) -> ir.StringValue: + return expr.substr(start=offset, length=length) + + return self._compliant_expr._with_callable(fn) + + def split(self, by: str) -> IbisExpr: + def fn(expr: ir.StringColumn) -> ir.ArrayValue: + return expr.split(by) + + return self._compliant_expr._with_callable(fn) + + def len_chars(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda expr: expr.length()) + + def to_lowercase(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda expr: expr.lower()) + + def to_uppercase(self) -> IbisExpr: + return self._compliant_expr._with_callable(lambda expr: expr.upper()) + + def strip_chars(self, characters: str | None) -> IbisExpr: + if characters is not None: + msg = "Ibis does not support `characters` argument in `str.strip_chars`" + raise NotImplementedError(msg) + + return self._compliant_expr._with_callable(lambda expr: expr.strip()) + + def _replace_all(self, pattern: str, value: str) -> Callable[..., ir.StringValue]: + def fn(expr: ir.StringColumn) -> ir.StringValue: + return expr.re_replace(pattern, value) + + return fn + + def _replace_all_literal( + self, pattern: str, value: str + ) -> Callable[..., ir.StringValue]: + def fn(expr: ir.StringColumn) -> ir.StringValue: + return expr.replace(pattern, value) # pyright: ignore[reportArgumentType] + + return fn + + def replace_all(self, pattern: str, value: str, *, literal: bool) -> IbisExpr: + fn = self._replace_all_literal if literal else self._replace_all + return self._compliant_expr._with_callable(fn(pattern, value)) + + def _to_datetime(self, format: str) -> Callable[..., ir.TimestampValue]: + def fn(expr: ir.StringColumn) -> ir.TimestampValue: + return expr.as_timestamp(format) + + return fn + + def _to_datetime_naive(self, format: str) -> Callable[..., ir.TimestampValue]: + def fn(expr: ir.StringColumn) -> ir.TimestampValue: + dtype: Any = Timestamp(timezone=None) + return expr.as_timestamp(format).cast(dtype) + + return fn + + def to_datetime(self, format: str | None) -> IbisExpr: + if format is None: + msg = "Cannot infer format with Ibis backend" + raise NotImplementedError(msg) + fn = self._to_datetime_naive if _is_naive_format(format) else self._to_datetime + return self._compliant_expr._with_callable(fn(format)) + + replace = not_implemented() diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_struct.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_struct.py new file mode 100644 index 0000000..f268281 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_struct.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import ibis.expr.types as ir + + from narwhals._ibis.expr import IbisExpr + + +class IbisExprStructNamespace: + def __init__(self, expr: IbisExpr) -> None: + self._compliant_expr = expr + + def field(self, name: str) -> IbisExpr: + def func(expr: ir.StructColumn) -> ir.Column: + return expr[name] + + return self._compliant_expr._with_callable(func).alias(name) diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/group_by.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/group_by.py new file mode 100644 index 0000000..54fa037 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/group_by.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Sequence + +from narwhals._compliant import LazyGroupBy + +if TYPE_CHECKING: + import ibis.expr.types as ir # noqa: F401 + + from narwhals._ibis.dataframe import IbisLazyFrame + from narwhals._ibis.expr import IbisExpr + + +class IbisGroupBy(LazyGroupBy["IbisLazyFrame", "IbisExpr", "ir.Value"]): + def __init__( + self, + df: IbisLazyFrame, + keys: Sequence[str] | Sequence[IbisExpr], + /, + *, + drop_null_keys: bool, + ) -> None: + frame, self._keys, self._output_key_names = self._parse_keys(df, keys=keys) + self._compliant_frame = frame.drop_nulls(self._keys) if drop_null_keys else frame + + def agg(self, *exprs: IbisExpr) -> IbisLazyFrame: + native = self.compliant.native + return self.compliant._with_native( + native.group_by(self._keys).aggregate(*self._evaluate_exprs(exprs)) + ).rename(dict(zip(self._keys, self._output_key_names))) diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/namespace.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/namespace.py new file mode 100644 index 0000000..25393cf --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/namespace.py @@ -0,0 +1,227 @@ +from __future__ import annotations + +import operator +from functools import reduce +from itertools import chain +from typing import TYPE_CHECKING, Any, Iterable, Sequence, cast + +import ibis +import ibis.expr.types as ir + +from narwhals._compliant import LazyNamespace, LazyThen, LazyWhen +from narwhals._expression_parsing import ( + combine_alias_output_names, + combine_evaluate_output_names, +) +from narwhals._ibis.dataframe import IbisLazyFrame +from narwhals._ibis.expr import IbisExpr +from narwhals._ibis.selectors import IbisSelectorNamespace +from narwhals._ibis.utils import lit, narwhals_to_native_dtype +from narwhals._utils import Implementation, requires + +if TYPE_CHECKING: + from narwhals._utils import Version + from narwhals.typing import ConcatMethod, IntoDType + + +class IbisNamespace(LazyNamespace[IbisLazyFrame, IbisExpr, "ir.Table"]): + _implementation: Implementation = Implementation.IBIS + + def __init__(self, *, backend_version: tuple[int, ...], version: Version) -> None: + self._backend_version = backend_version + self._version = version + + @property + def selectors(self) -> IbisSelectorNamespace: + return IbisSelectorNamespace.from_namespace(self) + + @property + def _expr(self) -> type[IbisExpr]: + return IbisExpr + + @property + def _lazyframe(self) -> type[IbisLazyFrame]: + return IbisLazyFrame + + def concat( + self, items: Iterable[IbisLazyFrame], *, how: ConcatMethod + ) -> IbisLazyFrame: + if how == "diagonal": + msg = "diagonal concat not supported for Ibis. Please join instead." + raise NotImplementedError(msg) + + items = list(items) + native_items = [item.native for item in items] + schema = items[0].schema + if not all(x.schema == schema for x in items[1:]): + msg = "inputs should all have the same schema" + raise TypeError(msg) + return self._lazyframe.from_native(ibis.union(*native_items), context=self) + + def concat_str( + self, *exprs: IbisExpr, separator: str, ignore_nulls: bool + ) -> IbisExpr: + def func(df: IbisLazyFrame) -> list[ir.Value]: + cols = list(chain.from_iterable(expr(df) for expr in exprs)) + cols_casted = [s.cast("string") for s in cols] + + if not ignore_nulls: + result = cols_casted[0] + for col in cols_casted[1:]: + result = result + separator + col + else: + sep = cast("ir.StringValue", lit(separator)) + result = sep.join(cols_casted) + + return [result] + + return self._expr( + call=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + backend_version=self._backend_version, + version=self._version, + ) + + def all_horizontal(self, *exprs: IbisExpr) -> IbisExpr: + def func(df: IbisLazyFrame) -> list[ir.Value]: + cols = chain.from_iterable(expr(df) for expr in exprs) + return [reduce(operator.and_, cols)] + + return self._expr( + call=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + backend_version=self._backend_version, + version=self._version, + ) + + def any_horizontal(self, *exprs: IbisExpr) -> IbisExpr: + def func(df: IbisLazyFrame) -> list[ir.Value]: + cols = chain.from_iterable(expr(df) for expr in exprs) + return [reduce(operator.or_, cols)] + + return self._expr( + call=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + backend_version=self._backend_version, + version=self._version, + ) + + def max_horizontal(self, *exprs: IbisExpr) -> IbisExpr: + def func(df: IbisLazyFrame) -> list[ir.Value]: + cols = chain.from_iterable(expr(df) for expr in exprs) + return [ibis.greatest(*cols)] + + return self._expr( + call=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + backend_version=self._backend_version, + version=self._version, + ) + + def min_horizontal(self, *exprs: IbisExpr) -> IbisExpr: + def func(df: IbisLazyFrame) -> list[ir.Value]: + cols = chain.from_iterable(expr(df) for expr in exprs) + return [ibis.least(*cols)] + + return self._expr( + call=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + backend_version=self._backend_version, + version=self._version, + ) + + def sum_horizontal(self, *exprs: IbisExpr) -> IbisExpr: + def func(df: IbisLazyFrame) -> list[ir.Value]: + cols = [e.fill_null(lit(0)) for _expr in exprs for e in _expr(df)] + return [reduce(operator.add, cols)] + + return self._expr( + call=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + backend_version=self._backend_version, + version=self._version, + ) + + def mean_horizontal(self, *exprs: IbisExpr) -> IbisExpr: + def func(df: IbisLazyFrame) -> list[ir.Value]: + expr = ( + cast("ir.NumericColumn", e.fill_null(lit(0))) + for _expr in exprs + for e in _expr(df) + ) + non_null = ( + cast("ir.NumericColumn", e.isnull().ifelse(lit(0), lit(1))) + for _expr in exprs + for e in _expr(df) + ) + + return [ + (reduce(lambda x, y: x + y, expr) / reduce(lambda x, y: x + y, non_null)) + ] + + return self._expr( + call=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + backend_version=self._backend_version, + version=self._version, + ) + + @requires.backend_version((10, 0)) + def when(self, predicate: IbisExpr) -> IbisWhen: + return IbisWhen.from_expr(predicate, context=self) + + def lit(self, value: Any, dtype: IntoDType | None) -> IbisExpr: + def func(_df: IbisLazyFrame) -> list[ir.Value]: + ibis_dtype = narwhals_to_native_dtype(dtype, self._version) if dtype else None + return [lit(value, ibis_dtype)] + + return self._expr( + func, + evaluate_output_names=lambda _df: ["literal"], + alias_output_names=None, + backend_version=self._backend_version, + version=self._version, + ) + + def len(self) -> IbisExpr: + def func(_df: IbisLazyFrame) -> list[ir.Value]: + return [_df.native.count()] + + return self._expr( + call=func, + evaluate_output_names=lambda _df: ["len"], + alias_output_names=None, + backend_version=self._backend_version, + version=self._version, + ) + + +class IbisWhen(LazyWhen["IbisLazyFrame", "ir.Value", IbisExpr]): + lit = lit + + @property + def _then(self) -> type[IbisThen]: + return IbisThen + + def __call__(self, df: IbisLazyFrame) -> Sequence[ir.Value]: + is_expr = self._condition._is_expr + condition = df._evaluate_expr(self._condition) + then_ = self._then_value + then = df._evaluate_expr(then_) if is_expr(then_) else lit(then_) + other_ = self._otherwise_value + if other_ is None: + result = ibis.cases((condition, then)) + else: + otherwise = df._evaluate_expr(other_) if is_expr(other_) else lit(other_) + result = ibis.cases((condition, then), else_=otherwise) + return [result] + + +class IbisThen(LazyThen["IbisLazyFrame", "ir.Value", IbisExpr], IbisExpr): ... diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/selectors.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/selectors.py new file mode 100644 index 0000000..f96243b --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/selectors.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from narwhals._compliant import CompliantSelector, LazySelectorNamespace +from narwhals._ibis.expr import IbisExpr + +if TYPE_CHECKING: + import ibis.expr.types as ir # noqa: F401 + + from narwhals._ibis.dataframe import IbisLazyFrame # noqa: F401 + + +class IbisSelectorNamespace(LazySelectorNamespace["IbisLazyFrame", "ir.Value"]): + @property + def _selector(self) -> type[IbisSelector]: + return IbisSelector + + +class IbisSelector( # type: ignore[misc] + CompliantSelector["IbisLazyFrame", "ir.Value"], IbisExpr +): + def _to_expr(self) -> IbisExpr: + return IbisExpr( + self._call, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + backend_version=self._backend_version, + version=self._version, + ) diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/series.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/series.py new file mode 100644 index 0000000..3c55d3c --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/series.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, NoReturn + +from narwhals._ibis.utils import native_to_narwhals_dtype +from narwhals.dependencies import get_ibis + +if TYPE_CHECKING: + from types import ModuleType + + from typing_extensions import Self + + from narwhals._utils import Version + from narwhals.dtypes import DType + + +class IbisInterchangeSeries: + def __init__(self, df: Any, version: Version) -> None: + self._native_series = df + self._version = version + + def __narwhals_series__(self) -> Self: + return self + + def __native_namespace__(self) -> ModuleType: + return get_ibis() + + @property + def dtype(self) -> DType: + return native_to_narwhals_dtype( + self._native_series.schema().types[0], self._version + ) + + def __getattr__(self, attr: str) -> NoReturn: + msg = ( + f"Attribute {attr} is not supported for interchange-level dataframes.\n\n" + "If you would like to see this kind of object better supported in " + "Narwhals, please open a feature request " + "at https://github.com/narwhals-dev/narwhals/issues." + ) + raise NotImplementedError(msg) diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/utils.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/utils.py new file mode 100644 index 0000000..477781f --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/utils.py @@ -0,0 +1,227 @@ +from __future__ import annotations + +from functools import lru_cache +from typing import TYPE_CHECKING, Any, Literal, Mapping + +import ibis +import ibis.expr.datatypes as ibis_dtypes + +from narwhals._utils import isinstance_or_issubclass + +if TYPE_CHECKING: + import ibis.expr.types as ir + from ibis.expr.datatypes import DataType as IbisDataType + from typing_extensions import TypeAlias, TypeIs + + from narwhals._duration import IntervalUnit + from narwhals._ibis.dataframe import IbisLazyFrame + from narwhals._ibis.expr import IbisExpr + from narwhals._utils import Version + from narwhals.dtypes import DType + from narwhals.typing import IntoDType + +lit = ibis.literal +"""Alias for `ibis.literal`.""" + +BucketUnit: TypeAlias = Literal[ + "years", + "quarters", + "months", + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + "nanoseconds", +] +TruncateUnit: TypeAlias = Literal[ + "Y", "Q", "M", "W", "D", "h", "m", "s", "ms", "us", "ns" +] + +UNITS_DICT_BUCKET: Mapping[IntervalUnit, BucketUnit] = { + "y": "years", + "q": "quarters", + "mo": "months", + "d": "days", + "h": "hours", + "m": "minutes", + "s": "seconds", + "ms": "milliseconds", + "us": "microseconds", + "ns": "nanoseconds", +} + +UNITS_DICT_TRUNCATE: Mapping[IntervalUnit, TruncateUnit] = { + "y": "Y", + "q": "Q", + "mo": "M", + "d": "D", + "h": "h", + "m": "m", + "s": "s", + "ms": "ms", + "us": "us", + "ns": "ns", +} + + +def evaluate_exprs(df: IbisLazyFrame, /, *exprs: IbisExpr) -> list[tuple[str, ir.Value]]: + native_results: list[tuple[str, ir.Value]] = [] + for expr in exprs: + native_series_list = expr(df) + output_names = expr._evaluate_output_names(df) + if expr._alias_output_names is not None: + output_names = expr._alias_output_names(output_names) + if len(output_names) != len(native_series_list): # pragma: no cover + msg = f"Internal error: got output names {output_names}, but only got {len(native_series_list)} results" + raise AssertionError(msg) + native_results.extend(zip(output_names, native_series_list)) + return native_results + + +@lru_cache(maxsize=16) +def native_to_narwhals_dtype(ibis_dtype: IbisDataType, version: Version) -> DType: # noqa: C901, PLR0912 + dtypes = version.dtypes + if ibis_dtype.is_int64(): + return dtypes.Int64() + if ibis_dtype.is_int32(): + return dtypes.Int32() + if ibis_dtype.is_int16(): + return dtypes.Int16() + if ibis_dtype.is_int8(): + return dtypes.Int8() + if ibis_dtype.is_uint64(): + return dtypes.UInt64() + if ibis_dtype.is_uint32(): + return dtypes.UInt32() + if ibis_dtype.is_uint16(): + return dtypes.UInt16() + if ibis_dtype.is_uint8(): + return dtypes.UInt8() + if ibis_dtype.is_boolean(): + return dtypes.Boolean() + if ibis_dtype.is_float64(): + return dtypes.Float64() + if ibis_dtype.is_float32(): + return dtypes.Float32() + if ibis_dtype.is_string(): + return dtypes.String() + if ibis_dtype.is_date(): + return dtypes.Date() + if ibis_dtype.is_timestamp(): + return dtypes.Datetime() + if is_interval(ibis_dtype): + _time_unit = ibis_dtype.unit.value + if _time_unit not in {"ns", "us", "ms", "s"}: # pragma: no cover + msg = f"Unsupported interval unit: {_time_unit}" + raise NotImplementedError(msg) + return dtypes.Duration(_time_unit) + if is_array(ibis_dtype): + if ibis_dtype.length: + return dtypes.Array( + native_to_narwhals_dtype(ibis_dtype.value_type, version), + ibis_dtype.length, + ) + else: + return dtypes.List(native_to_narwhals_dtype(ibis_dtype.value_type, version)) + if is_struct(ibis_dtype): + return dtypes.Struct( + [ + dtypes.Field(name, native_to_narwhals_dtype(dtype, version)) + for name, dtype in ibis_dtype.items() + ] + ) + if ibis_dtype.is_decimal(): # pragma: no cover + return dtypes.Decimal() + if ibis_dtype.is_time(): + return dtypes.Time() + if ibis_dtype.is_binary(): + return dtypes.Binary() + return dtypes.Unknown() # pragma: no cover + + +def is_interval(obj: IbisDataType) -> TypeIs[ibis_dtypes.Interval]: + return obj.is_interval() + + +def is_array(obj: IbisDataType) -> TypeIs[ibis_dtypes.Array[Any]]: + return obj.is_array() + + +def is_struct(obj: IbisDataType) -> TypeIs[ibis_dtypes.Struct]: + return obj.is_struct() + + +def is_floating(obj: IbisDataType) -> TypeIs[ibis_dtypes.Floating]: + return obj.is_floating() + + +def narwhals_to_native_dtype( # noqa: C901, PLR0912 + dtype: IntoDType, version: Version +) -> IbisDataType: + dtypes = version.dtypes + + if isinstance_or_issubclass(dtype, dtypes.Decimal): # pragma: no cover + return ibis_dtypes.Decimal() + if isinstance_or_issubclass(dtype, dtypes.Float64): + return ibis_dtypes.Float64() + if isinstance_or_issubclass(dtype, dtypes.Float32): + return ibis_dtypes.Float32() + if isinstance_or_issubclass(dtype, dtypes.Int128): # pragma: no cover + msg = "Int128 not supported by Ibis" + raise NotImplementedError(msg) + if isinstance_or_issubclass(dtype, dtypes.Int64): + return ibis_dtypes.Int64() + if isinstance_or_issubclass(dtype, dtypes.Int32): + return ibis_dtypes.Int32() + if isinstance_or_issubclass(dtype, dtypes.Int16): + return ibis_dtypes.Int16() + if isinstance_or_issubclass(dtype, dtypes.Int8): + return ibis_dtypes.Int8() + if isinstance_or_issubclass(dtype, dtypes.UInt128): # pragma: no cover + msg = "UInt128 not supported by Ibis" + raise NotImplementedError(msg) + if isinstance_or_issubclass(dtype, dtypes.UInt64): + return ibis_dtypes.UInt64() + if isinstance_or_issubclass(dtype, dtypes.UInt32): + return ibis_dtypes.UInt32() + if isinstance_or_issubclass(dtype, dtypes.UInt16): + return ibis_dtypes.UInt16() + if isinstance_or_issubclass(dtype, dtypes.UInt8): + return ibis_dtypes.UInt8() + if isinstance_or_issubclass(dtype, dtypes.String): + return ibis_dtypes.String() + if isinstance_or_issubclass(dtype, dtypes.Boolean): + return ibis_dtypes.Boolean() + if isinstance_or_issubclass(dtype, dtypes.Categorical): + msg = "Categorical not supported by Ibis" + raise NotImplementedError(msg) + if isinstance_or_issubclass(dtype, dtypes.Datetime): + return ibis_dtypes.Timestamp() + if isinstance_or_issubclass(dtype, dtypes.Duration): + return ibis_dtypes.Interval(unit=dtype.time_unit) # pyright: ignore[reportArgumentType] + if isinstance_or_issubclass(dtype, dtypes.Date): + return ibis_dtypes.Date() + if isinstance_or_issubclass(dtype, dtypes.Time): + return ibis_dtypes.Time() + if isinstance_or_issubclass(dtype, dtypes.List): + inner = narwhals_to_native_dtype(dtype.inner, version) + return ibis_dtypes.Array(value_type=inner) + if isinstance_or_issubclass(dtype, dtypes.Struct): + fields = [ + (field.name, narwhals_to_native_dtype(field.dtype, version)) + for field in dtype.fields + ] + return ibis_dtypes.Struct.from_tuples(fields) + if isinstance_or_issubclass(dtype, dtypes.Array): + inner = narwhals_to_native_dtype(dtype.inner, version) + return ibis_dtypes.Array(value_type=inner, length=dtype.size) + if isinstance_or_issubclass(dtype, dtypes.Binary): + return ibis_dtypes.Binary() + if isinstance_or_issubclass(dtype, dtypes.Enum): + # Ibis does not support: https://github.com/ibis-project/ibis/issues/10991 + msg = "Enum not supported by Ibis" + raise NotImplementedError(msg) + msg = f"Unknown dtype: {dtype}" # pragma: no cover + raise AssertionError(msg) -- cgit v1.2.3-70-g09d2