aboutsummaryrefslogtreecommitdiff
path: root/venv/lib/python3.8/site-packages/narwhals/expr.py
diff options
context:
space:
mode:
authorsotech117 <michael_foiani@brown.edu>2025-07-31 17:27:24 -0400
committersotech117 <michael_foiani@brown.edu>2025-07-31 17:27:24 -0400
commit5bf22fc7e3c392c8bd44315ca2d06d7dca7d084e (patch)
tree8dacb0f195df1c0788d36dd0064f6bbaa3143ede /venv/lib/python3.8/site-packages/narwhals/expr.py
parentb832d364da8c2efe09e3f75828caf73c50d01ce3 (diff)
add code for analysis of data
Diffstat (limited to 'venv/lib/python3.8/site-packages/narwhals/expr.py')
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/expr.py2544
1 files changed, 2544 insertions, 0 deletions
diff --git a/venv/lib/python3.8/site-packages/narwhals/expr.py b/venv/lib/python3.8/site-packages/narwhals/expr.py
new file mode 100644
index 0000000..fcd48f1
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/expr.py
@@ -0,0 +1,2544 @@
+from __future__ import annotations
+
+import math
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Mapping, Sequence
+
+from narwhals._expression_parsing import (
+ ExprMetadata,
+ apply_n_ary_operation,
+ combine_metadata,
+ extract_compliant,
+)
+from narwhals._utils import (
+ _validate_rolling_arguments,
+ ensure_type,
+ flatten,
+ issue_deprecation_warning,
+)
+from narwhals.dtypes import _validate_dtype
+from narwhals.exceptions import InvalidOperationError
+from narwhals.expr_cat import ExprCatNamespace
+from narwhals.expr_dt import ExprDateTimeNamespace
+from narwhals.expr_list import ExprListNamespace
+from narwhals.expr_name import ExprNameNamespace
+from narwhals.expr_str import ExprStringNamespace
+from narwhals.expr_struct import ExprStructNamespace
+from narwhals.translate import to_native
+
+if TYPE_CHECKING:
+ from typing import TypeVar
+
+ from typing_extensions import Concatenate, ParamSpec, Self, TypeAlias
+
+ from narwhals._compliant import CompliantExpr, CompliantNamespace
+ from narwhals.dtypes import DType
+ from narwhals.typing import (
+ ClosedInterval,
+ FillNullStrategy,
+ IntoDType,
+ IntoExpr,
+ NonNestedLiteral,
+ NumericLiteral,
+ RankMethod,
+ RollingInterpolationMethod,
+ TemporalLiteral,
+ )
+
+ PS = ParamSpec("PS")
+ R = TypeVar("R")
+ _ToCompliant: TypeAlias = Callable[
+ [CompliantNamespace[Any, Any]], CompliantExpr[Any, Any]
+ ]
+
+
+class Expr:
+ def __init__(self, to_compliant_expr: _ToCompliant, metadata: ExprMetadata) -> None:
+ # callable from CompliantNamespace to CompliantExpr
+ def func(plx: CompliantNamespace[Any, Any]) -> CompliantExpr[Any, Any]:
+ result = to_compliant_expr(plx)
+ result._metadata = self._metadata
+ return result
+
+ self._to_compliant_expr: _ToCompliant = func
+ self._metadata = metadata
+
+ def _with_elementwise_op(self, to_compliant_expr: Callable[[Any], Any]) -> Self:
+ return self.__class__(to_compliant_expr, self._metadata.with_elementwise_op())
+
+ def _with_aggregation(self, to_compliant_expr: Callable[[Any], Any]) -> Self:
+ return self.__class__(to_compliant_expr, self._metadata.with_aggregation())
+
+ def _with_orderable_aggregation(
+ self, to_compliant_expr: Callable[[Any], Any]
+ ) -> Self:
+ return self.__class__(
+ to_compliant_expr, self._metadata.with_orderable_aggregation()
+ )
+
+ def _with_orderable_window(self, to_compliant_expr: Callable[[Any], Any]) -> Self:
+ return self.__class__(to_compliant_expr, self._metadata.with_orderable_window())
+
+ def _with_unorderable_window(self, to_compliant_expr: Callable[[Any], Any]) -> Self:
+ return self.__class__(to_compliant_expr, self._metadata.with_unorderable_window())
+
+ def _with_filtration(self, to_compliant_expr: Callable[[Any], Any]) -> Self:
+ return self.__class__(to_compliant_expr, self._metadata.with_filtration())
+
+ def _with_orderable_filtration(self, to_compliant_expr: Callable[[Any], Any]) -> Self:
+ return self.__class__(
+ to_compliant_expr, self._metadata.with_orderable_filtration()
+ )
+
+ def __repr__(self) -> str:
+ return f"Narwhals Expr\nmetadata: {self._metadata}\n"
+
+ def _taxicab_norm(self) -> Self:
+ # This is just used to test out the stable api feature in a realistic-ish way.
+ # It's not intended to be used.
+ return self._with_aggregation(
+ lambda plx: self._to_compliant_expr(plx).abs().sum()
+ )
+
+ # --- convert ---
+ def alias(self, name: str) -> Self:
+ """Rename the expression.
+
+ Arguments:
+ name: The new name.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select((nw.col("b") + 10).alias("c"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | c |
+ | 0 14 |
+ | 1 15 |
+ └──────────────────┘
+ """
+ # Don't use `_with_elementwise_op` so that `_metadata.last_node` is preserved.
+ return self.__class__(
+ lambda plx: self._to_compliant_expr(plx).alias(name), self._metadata
+ )
+
+ def pipe(
+ self,
+ function: Callable[Concatenate[Self, PS], R],
+ *args: PS.args,
+ **kwargs: PS.kwargs,
+ ) -> R:
+ """Pipe function call.
+
+ Arguments:
+ function: Function to apply.
+ args: Positional arguments to pass to function.
+ kwargs: Keyword arguments to pass to function.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 4]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_piped=nw.col("a").pipe(lambda x: x + 1))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a a_piped |
+ | 0 1 2 |
+ | 1 2 3 |
+ | 2 3 4 |
+ | 3 4 5 |
+ └──────────────────┘
+ """
+ return function(self, *args, **kwargs)
+
+ def cast(self, dtype: IntoDType) -> Self:
+ """Redefine an object's data type.
+
+ Arguments:
+ dtype: Data type that the object will be cast into.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | foo bar |
+ | 0 1.0 6 |
+ | 1 2.0 7 |
+ | 2 3.0 8 |
+ └──────────────────┘
+ """
+ _validate_dtype(dtype)
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).cast(dtype)
+ )
+
+ # --- binary ---
+ def __eq__(self, other: Self | Any) -> Self: # type: ignore[override]
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x == y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __ne__(self, other: Self | Any) -> Self: # type: ignore[override]
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x != y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __and__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x & y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __rand__(self, other: Any) -> Self:
+ return (self & other).alias("literal") # type: ignore[no-any-return]
+
+ def __or__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x | y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __ror__(self, other: Any) -> Self:
+ return (self | other).alias("literal") # type: ignore[no-any-return]
+
+ def __add__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x + y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __radd__(self, other: Any) -> Self:
+ return (self + other).alias("literal") # type: ignore[no-any-return]
+
+ def __sub__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x - y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __rsub__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x.__rsub__(y), self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __truediv__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x / y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __rtruediv__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x.__rtruediv__(y), self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __mul__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x * y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __rmul__(self, other: Any) -> Self:
+ return (self * other).alias("literal") # type: ignore[no-any-return]
+
+ def __le__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x <= y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __lt__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x < y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __gt__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x > y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __ge__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x >= y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __pow__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x**y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __rpow__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x.__rpow__(y), self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __floordiv__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x // y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __rfloordiv__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x.__rfloordiv__(y), self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __mod__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x % y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __rmod__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x.__rmod__(y), self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ # --- unary ---
+ def __invert__(self) -> Self:
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).__invert__()
+ )
+
+ def any(self) -> Self:
+ """Return whether any of the values in the column are `True`.
+
+ If there are no non-null elements, the result is `False`.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [True, False], "b": [True, True]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").any())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 True True |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).any())
+
+ def all(self) -> Self:
+ """Return whether all values in the column are `True`.
+
+ If there are no non-null elements, the result is `True`.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [True, False], "b": [True, True]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").all())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 False True |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).all())
+
+ def ewm_mean(
+ self,
+ *,
+ com: float | None = None,
+ span: float | None = None,
+ half_life: float | None = None,
+ alpha: float | None = None,
+ adjust: bool = True,
+ min_samples: int = 1,
+ ignore_nulls: bool = False,
+ ) -> Self:
+ r"""Compute exponentially-weighted moving average.
+
+ Arguments:
+ com: Specify decay in terms of center of mass, $\gamma$, with <br> $\alpha = \frac{1}{1+\gamma}\forall\gamma\geq0$
+ span: Specify decay in terms of span, $\theta$, with <br> $\alpha = \frac{2}{\theta + 1} \forall \theta \geq 1$
+ half_life: Specify decay in terms of half-life, $\tau$, with <br> $\alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \tau } \right\} \forall \tau > 0$
+ alpha: Specify smoothing factor alpha directly, $0 < \alpha \leq 1$.
+ adjust: Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings
+
+ - When `adjust=True` (the default) the EW function is calculated
+ using weights $w_i = (1 - \alpha)^i$
+ - When `adjust=False` the EW function is calculated recursively by
+ $$
+ y_0=x_0
+ $$
+ $$
+ y_t = (1 - \alpha)y_{t - 1} + \alpha x_t
+ $$
+ min_samples: Minimum number of observations in window required to have a value, (otherwise result is null).
+ ignore_nulls: Ignore missing values when calculating weights.
+
+ - When `ignore_nulls=False` (default), weights are based on absolute
+ positions.
+ For example, the weights of $x_0$ and $x_2$ used in
+ calculating the final weighted average of $[x_0, None, x_2]$ are
+ $(1-\alpha)^2$ and $1$ if `adjust=True`, and
+ $(1-\alpha)^2$ and $\alpha$ if `adjust=False`.
+ - When `ignore_nulls=True`, weights are based
+ on relative positions. For example, the weights of
+ $x_0$ and $x_2$ used in calculating the final weighted
+ average of $[x_0, None, x_2]$ are
+ $1-\alpha$ and $1$ if `adjust=True`,
+ and $1-\alpha$ and $\alpha$ if `adjust=False`.
+
+ Returns:
+ Expr
+
+ Examples:
+ >>> import pandas as pd
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> from narwhals.typing import IntoFrameT
+ >>>
+ >>> data = {"a": [1, 2, 3]}
+ >>> df_pd = pd.DataFrame(data)
+ >>> df_pl = pl.DataFrame(data)
+
+ We define a library agnostic function:
+
+ >>> def agnostic_ewm_mean(df_native: IntoFrameT) -> IntoFrameT:
+ ... df = nw.from_native(df_native)
+ ... return df.select(
+ ... nw.col("a").ewm_mean(com=1, ignore_nulls=False)
+ ... ).to_native()
+
+ We can then pass either pandas or Polars to `agnostic_ewm_mean`:
+
+ >>> agnostic_ewm_mean(df_pd)
+ a
+ 0 1.000000
+ 1 1.666667
+ 2 2.428571
+
+ >>> agnostic_ewm_mean(df_pl) # doctest: +NORMALIZE_WHITESPACE
+ shape: (3, 1)
+ ┌──────────┐
+ │ a │
+ │ --- │
+ │ f64 │
+ ╞══════════╡
+ │ 1.0 │
+ │ 1.666667 │
+ │ 2.428571 │
+ └──────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).ewm_mean(
+ com=com,
+ span=span,
+ half_life=half_life,
+ alpha=alpha,
+ adjust=adjust,
+ min_samples=min_samples,
+ ignore_nulls=ignore_nulls,
+ )
+ )
+
+ def mean(self) -> Self:
+ """Get mean value.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").mean())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 0.0 4.0 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).mean())
+
+ def median(self) -> Self:
+ """Get median value.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").median())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 3.0 4.0 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).median())
+
+ def std(self, *, ddof: int = 1) -> Self:
+ """Get standard deviation.
+
+ Arguments:
+ ddof: "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
+ where N represents the number of elements. By default ddof is 1.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").std(ddof=0))
+ ┌─────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------|
+ | a b|
+ |0 17.79513 1.265789|
+ └─────────────────────┘
+ """
+ return self._with_aggregation(
+ lambda plx: self._to_compliant_expr(plx).std(ddof=ddof)
+ )
+
+ def var(self, *, ddof: int = 1) -> Self:
+ """Get variance.
+
+ Arguments:
+ ddof: "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
+ where N represents the number of elements. By default ddof is 1.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").var(ddof=0))
+ ┌───────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------|
+ | a b|
+ |0 316.666667 1.602222|
+ └───────────────────────┘
+ """
+ return self._with_aggregation(
+ lambda plx: self._to_compliant_expr(plx).var(ddof=ddof)
+ )
+
+ def map_batches(
+ self,
+ function: Callable[[Any], CompliantExpr[Any, Any]],
+ return_dtype: DType | None = None,
+ ) -> Self:
+ """Apply a custom python function to a whole Series or sequence of Series.
+
+ The output of this custom function is presumed to be either a Series,
+ or a NumPy array (in which case it will be automatically converted into
+ a Series).
+
+ Arguments:
+ function: Function to apply to Series.
+ return_dtype: Dtype of the output Series.
+ If not set, the dtype will be inferred based on the first non-null value
+ that is returned by the function.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("a", "b")
+ ... .map_batches(lambda s: s.to_numpy() + 1, return_dtype=nw.Float64)
+ ... .name.suffix("_mapped")
+ ... )
+ ┌───────────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------------|
+ | a b a_mapped b_mapped|
+ |0 1 4 2.0 5.0|
+ |1 2 5 3.0 6.0|
+ |2 3 6 4.0 7.0|
+ └───────────────────────────┘
+ """
+ # safest assumptions
+ return self._with_orderable_filtration(
+ lambda plx: self._to_compliant_expr(plx).map_batches(
+ function=function, return_dtype=return_dtype
+ )
+ )
+
+ def skew(self) -> Self:
+ """Calculate the sample skewness of a column.
+
+ Returns:
+ An expression representing the sample skewness of the column.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").skew())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 0.0 1.472427 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).skew())
+
+ def sum(self) -> Expr:
+ """Return the sum value.
+
+ If there are no non-null elements, the result is zero.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> df_native = duckdb.sql("SELECT * FROM VALUES (5, 50), (10, 100) df(a, b)")
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").sum())
+ ┌───────────────────┐
+ |Narwhals LazyFrame |
+ |-------------------|
+ |┌────────┬────────┐|
+ |│ a │ b │|
+ |│ int128 │ int128 │|
+ |├────────┼────────┤|
+ |│ 15 │ 150 │|
+ |└────────┴────────┘|
+ └───────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).sum())
+
+ def min(self) -> Self:
+ """Returns the minimum value(s) from a column(s).
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": [4, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.min("a", "b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 1 3 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).min())
+
+ def max(self) -> Self:
+ """Returns the maximum value(s) from a column(s).
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [10, 20], "b": [50, 100]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.max("a", "b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 20 100 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).max())
+
+ def arg_min(self) -> Self:
+ """Returns the index of the minimum value.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [10, 20], "b": [150, 100]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").arg_min().name.suffix("_arg_min"))
+ ┌───────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------|
+ | a_arg_min b_arg_min|
+ |0 0 1|
+ └───────────────────────┘
+ """
+ return self._with_orderable_aggregation(
+ lambda plx: self._to_compliant_expr(plx).arg_min()
+ )
+
+ def arg_max(self) -> Self:
+ """Returns the index of the maximum value.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [10, 20], "b": [150, 100]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").arg_max().name.suffix("_arg_max"))
+ ┌───────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------|
+ | a_arg_max b_arg_max|
+ |0 1 0|
+ └───────────────────────┘
+ """
+ return self._with_orderable_aggregation(
+ lambda plx: self._to_compliant_expr(plx).arg_max()
+ )
+
+ def count(self) -> Self:
+ """Returns the number of non-null elements in the column.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.all().count())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 3 2 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).count())
+
+ def n_unique(self) -> Self:
+ """Returns count of unique values.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").n_unique())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 5 3 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).n_unique())
+
+ def unique(self) -> Self:
+ """Return unique values of this expression.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").unique().sum())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 9 12 |
+ └──────────────────┘
+ """
+ return self._with_filtration(lambda plx: self._to_compliant_expr(plx).unique())
+
+ def abs(self) -> Self:
+ """Return absolute value of each element.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, -2], "b": [-3, 4]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(nw.col("a", "b").abs().name.suffix("_abs"))
+ ┌─────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------|
+ | a b a_abs b_abs|
+ |0 1 -3 1 3|
+ |1 -2 4 2 4|
+ └─────────────────────┘
+ """
+ return self._with_elementwise_op(lambda plx: self._to_compliant_expr(plx).abs())
+
+ def cum_sum(self, *, reverse: bool = False) -> Self:
+ """Return cumulative sum.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ reverse: reverse the operation
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_cum_sum=nw.col("a").cum_sum())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b a_cum_sum|
+ |0 1 2 1|
+ |1 1 4 2|
+ |2 3 4 5|
+ |3 5 6 10|
+ |4 5 6 15|
+ └──────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).cum_sum(reverse=reverse)
+ )
+
+ def diff(self) -> Self:
+ """Returns the difference between each element and the previous one.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Returns:
+ A new expression.
+
+ Notes:
+ pandas may change the dtype here, for example when introducing missing
+ values in an integer column. To ensure, that the dtype doesn't change,
+ you may want to use `fill_null` and `cast`. For example, to calculate
+ the diff and fill missing values with `0` in a Int64 column, you could
+ do:
+
+ nw.col("a").diff().fill_null(0).cast(nw.Int64)
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_diff=nw.col("a").diff())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | shape: (5, 2) |
+ | ┌─────┬────────┐ |
+ | │ a ┆ a_diff │ |
+ | │ --- ┆ --- │ |
+ | │ i64 ┆ i64 │ |
+ | ╞═════╪════════╡ |
+ | │ 1 ┆ null │ |
+ | │ 1 ┆ 0 │ |
+ | │ 3 ┆ 2 │ |
+ | │ 5 ┆ 2 │ |
+ | │ 5 ┆ 0 │ |
+ | └─────┴────────┘ |
+ └──────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).diff()
+ )
+
+ def shift(self, n: int) -> Self:
+ """Shift values by `n` positions.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ n: Number of positions to shift values by.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ pandas may change the dtype here, for example when introducing missing
+ values in an integer column. To ensure, that the dtype doesn't change,
+ you may want to use `fill_null` and `cast`. For example, to shift
+ and fill missing values with `0` in a Int64 column, you could
+ do:
+
+ nw.col("a").shift(1).fill_null(0).cast(nw.Int64)
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_shift=nw.col("a").shift(n=1))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ |shape: (5, 2) |
+ |┌─────┬─────────┐ |
+ |│ a ┆ a_shift │ |
+ |│ --- ┆ --- │ |
+ |│ i64 ┆ i64 │ |
+ |╞═════╪═════════╡ |
+ |│ 1 ┆ null │ |
+ |│ 1 ┆ 1 │ |
+ |│ 3 ┆ 1 │ |
+ |│ 5 ┆ 3 │ |
+ |│ 5 ┆ 5 │ |
+ |└─────┴─────────┘ |
+ └──────────────────┘
+ """
+ ensure_type(n, int, param_name="n")
+
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).shift(n)
+ )
+
+ def replace_strict(
+ self,
+ old: Sequence[Any] | Mapping[Any, Any],
+ new: Sequence[Any] | None = None,
+ *,
+ return_dtype: IntoDType | None = None,
+ ) -> Self:
+ """Replace all values by different values.
+
+ This function must replace all non-null input values (else it raises an error).
+
+ Arguments:
+ old: Sequence of values to replace. It also accepts a mapping of values to
+ their replacement as syntactic sugar for
+ `replace_strict(old=list(mapping.keys()), new=list(mapping.values()))`.
+ new: Sequence of values to replace by. Length must match the length of `old`.
+ return_dtype: The data type of the resulting expression. If set to `None`
+ (default), the data type is determined automatically based on the other
+ inputs.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [3, 0, 1, 2]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... b=nw.col("a").replace_strict(
+ ... [0, 1, 2, 3],
+ ... ["zero", "one", "two", "three"],
+ ... return_dtype=nw.String,
+ ... )
+ ... )
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 3 three |
+ | 1 0 zero |
+ | 2 1 one |
+ | 3 2 two |
+ └──────────────────┘
+ """
+ if new is None:
+ if not isinstance(old, Mapping):
+ msg = "`new` argument is required if `old` argument is not a Mapping type"
+ raise TypeError(msg)
+
+ new = list(old.values())
+ old = list(old.keys())
+
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).replace_strict(
+ old, new, return_dtype=return_dtype
+ )
+ )
+
+ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self:
+ """Sort this column. Place null values first.
+
+ Warning:
+ `Expr.sort` is deprecated and will be removed in a future version.
+ Hint: instead of `df.select(nw.col('a').sort())`, use
+ `df.select(nw.col('a')).sort()` instead.
+ Note: this will remain available in `narwhals.stable.v1`.
+ See [stable api](../backcompat.md/) for more information.
+
+ Arguments:
+ descending: Sort in descending order.
+ nulls_last: Place null values last instead of first.
+
+ Returns:
+ A new expression.
+ """
+ msg = (
+ "`Expr.sort` is deprecated and will be removed in a future version.\n\n"
+ "Hint: instead of `df.select(nw.col('a').sort())`, use `df.select(nw.col('a')).sort()`.\n\n"
+ "Note: this will remain available in `narwhals.stable.v1`.\n"
+ "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
+ )
+ issue_deprecation_warning(msg, _version="1.23.0")
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).sort(
+ descending=descending, nulls_last=nulls_last
+ )
+ )
+
+ # --- transform ---
+ def is_between(
+ self,
+ lower_bound: Any | IntoExpr,
+ upper_bound: Any | IntoExpr,
+ closed: ClosedInterval = "both",
+ ) -> Self:
+ """Check if this expression is between the given lower and upper bounds.
+
+ Arguments:
+ lower_bound: Lower bound value. String literals are interpreted as column names.
+ upper_bound: Upper bound value. String literals are interpreted as column names.
+ closed: Define which sides of the interval are closed (inclusive).
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(b=nw.col("a").is_between(2, 4, "right"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 1 False |
+ | 1 2 False |
+ | 2 3 True |
+ | 3 4 True |
+ | 4 5 False |
+ └──────────────────┘
+ """
+
+ def func(
+ compliant_expr: CompliantExpr[Any, Any],
+ lb: CompliantExpr[Any, Any],
+ ub: CompliantExpr[Any, Any],
+ ) -> CompliantExpr[Any, Any]:
+ if closed == "left":
+ return (compliant_expr >= lb) & (compliant_expr < ub)
+ elif closed == "right":
+ return (compliant_expr > lb) & (compliant_expr <= ub)
+ elif closed == "none":
+ return (compliant_expr > lb) & (compliant_expr < ub)
+ return (compliant_expr >= lb) & (compliant_expr <= ub)
+
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, func, self, lower_bound, upper_bound, str_as_lit=False
+ ),
+ combine_metadata(
+ self,
+ lower_bound,
+ upper_bound,
+ str_as_lit=False,
+ allow_multi_output=False,
+ to_single_output=False,
+ ),
+ )
+
+ def is_in(self, other: Any) -> Self:
+ """Check if elements of this expression are present in the other iterable.
+
+ Arguments:
+ other: iterable
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 9, 10]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(b=nw.col("a").is_in([1, 2]))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 1 True |
+ | 1 2 True |
+ | 2 9 False |
+ | 3 10 False |
+ └──────────────────┘
+ """
+ if isinstance(other, Iterable) and not isinstance(other, (str, bytes)):
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).is_in(
+ to_native(other, pass_through=True)
+ )
+ )
+ else:
+ msg = "Narwhals `is_in` doesn't accept expressions as an argument, as opposed to Polars. You should provide an iterable instead."
+ raise NotImplementedError(msg)
+
+ def filter(self, *predicates: Any) -> Self:
+ """Filters elements based on a condition, returning a new expression.
+
+ Arguments:
+ predicates: Conditions to filter by (which get ANDed together).
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.select(
+ ... nw.col("a").filter(nw.col("a") > 4),
+ ... nw.col("b").filter(nw.col("b") < 13),
+ ... )
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 3 5 10 |
+ | 4 6 11 |
+ | 5 7 12 |
+ └──────────────────┘
+ """
+ flat_predicates = flatten(predicates)
+ metadata = combine_metadata(
+ self,
+ *flat_predicates,
+ str_as_lit=False,
+ allow_multi_output=True,
+ to_single_output=False,
+ ).with_filtration()
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx,
+ lambda *exprs: exprs[0].filter(*exprs[1:]),
+ self,
+ *flat_predicates,
+ str_as_lit=False,
+ ),
+ metadata,
+ )
+
+ def is_null(self) -> Self:
+ """Returns a boolean Series indicating which values are null.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> df_native = duckdb.sql(
+ ... "SELECT * FROM VALUES (null, CAST('NaN' AS DOUBLE)), (2, 2.) df(a, b)"
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
+ ... )
+ ┌──────────────────────────────────────────┐
+ | Narwhals LazyFrame |
+ |------------------------------------------|
+ |┌───────┬────────┬───────────┬───────────┐|
+ |│ a │ b │ a_is_null │ b_is_null │|
+ |│ int32 │ double │ boolean │ boolean │|
+ |├───────┼────────┼───────────┼───────────┤|
+ |│ NULL │ nan │ true │ false │|
+ |│ 2 │ 2.0 │ false │ false │|
+ |└───────┴────────┴───────────┴───────────┘|
+ └──────────────────────────────────────────┘
+ """
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).is_null()
+ )
+
+ def is_nan(self) -> Self:
+ """Indicate which values are NaN.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> df_native = duckdb.sql(
+ ... "SELECT * FROM VALUES (null, CAST('NaN' AS DOUBLE)), (2, 2.) df(a, b)"
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_is_nan=nw.col("a").is_nan(), b_is_nan=nw.col("b").is_nan()
+ ... )
+ ┌────────────────────────────────────────┐
+ | Narwhals LazyFrame |
+ |----------------------------------------|
+ |┌───────┬────────┬──────────┬──────────┐|
+ |│ a │ b │ a_is_nan │ b_is_nan │|
+ |│ int32 │ double │ boolean │ boolean │|
+ |├───────┼────────┼──────────┼──────────┤|
+ |│ NULL │ nan │ NULL │ true │|
+ |│ 2 │ 2.0 │ false │ false │|
+ |└───────┴────────┴──────────┴──────────┘|
+ └────────────────────────────────────────┘
+ """
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).is_nan()
+ )
+
+ def arg_true(self) -> Self:
+ """Find elements where boolean expression is True.
+
+ Returns:
+ A new expression.
+ """
+ msg = (
+ "`Expr.arg_true` is deprecated and will be removed in a future version.\n\n"
+ "Note: this will remain available in `narwhals.stable.v1`.\n"
+ "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
+ )
+ issue_deprecation_warning(msg, _version="1.23.0")
+ return self._with_filtration(lambda plx: self._to_compliant_expr(plx).arg_true())
+
+ def fill_null(
+ self,
+ value: Expr | NonNestedLiteral = None,
+ strategy: FillNullStrategy | None = None,
+ limit: int | None = None,
+ ) -> Self:
+ """Fill null values with given value.
+
+ Arguments:
+ value: Value or expression used to fill null values.
+ strategy: Strategy used to fill null values.
+ limit: Number of consecutive null values to fill when using the 'forward' or 'backward' strategy.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame(
+ ... {
+ ... "a": [2, None, None, 3],
+ ... "b": [2.0, float("nan"), float("nan"), 3.0],
+ ... "c": [1, 2, 3, 4],
+ ... }
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("a", "b").fill_null(0).name.suffix("_filled"),
+ ... nw.col("a").fill_null(nw.col("c")).name.suffix("_filled_with_c"),
+ ... )
+ ┌────────────────────────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------------------------------------------|
+ |shape: (4, 6) |
+ |┌──────┬─────┬─────┬──────────┬──────────┬─────────────────┐|
+ |│ a ┆ b ┆ c ┆ a_filled ┆ b_filled ┆ a_filled_with_c │|
+ |│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │|
+ |│ i64 ┆ f64 ┆ i64 ┆ i64 ┆ f64 ┆ i64 │|
+ |╞══════╪═════╪═════╪══════════╪══════════╪═════════════════╡|
+ |│ 2 ┆ 2.0 ┆ 1 ┆ 2 ┆ 2.0 ┆ 2 │|
+ |│ null ┆ NaN ┆ 2 ┆ 0 ┆ NaN ┆ 2 │|
+ |│ null ┆ NaN ┆ 3 ┆ 0 ┆ NaN ┆ 3 │|
+ |│ 3 ┆ 3.0 ┆ 4 ┆ 3 ┆ 3.0 ┆ 3 │|
+ |└──────┴─────┴─────┴──────────┴──────────┴─────────────────┘|
+ └────────────────────────────────────────────────────────────┘
+
+ Using a strategy:
+
+ >>> df.select(
+ ... nw.col("a", "b"),
+ ... nw.col("a", "b")
+ ... .fill_null(strategy="forward", limit=1)
+ ... .name.suffix("_nulls_forward_filled"),
+ ... )
+ ┌────────────────────────────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |----------------------------------------------------------------|
+ |shape: (4, 4) |
+ |┌──────┬─────┬────────────────────────┬────────────────────────┐|
+ |│ a ┆ b ┆ a_nulls_forward_filled ┆ b_nulls_forward_filled │|
+ |│ --- ┆ --- ┆ --- ┆ --- │|
+ |│ i64 ┆ f64 ┆ i64 ┆ f64 │|
+ |╞══════╪═════╪════════════════════════╪════════════════════════╡|
+ |│ 2 ┆ 2.0 ┆ 2 ┆ 2.0 │|
+ |│ null ┆ NaN ┆ 2 ┆ NaN │|
+ |│ null ┆ NaN ┆ null ┆ NaN │|
+ |│ 3 ┆ 3.0 ┆ 3 ┆ 3.0 │|
+ |└──────┴─────┴────────────────────────┴────────────────────────┘|
+ └────────────────────────────────────────────────────────────────┘
+ """
+ if value is not None and strategy is not None:
+ msg = "cannot specify both `value` and `strategy`"
+ raise ValueError(msg)
+ if value is None and strategy is None:
+ msg = "must specify either a fill `value` or `strategy`"
+ raise ValueError(msg)
+ if strategy is not None and strategy not in {"forward", "backward"}:
+ msg = f"strategy not supported: {strategy}"
+ raise ValueError(msg)
+
+ return self.__class__(
+ lambda plx: self._to_compliant_expr(plx).fill_null(
+ value=extract_compliant(plx, value, str_as_lit=True),
+ strategy=strategy,
+ limit=limit,
+ ),
+ self._metadata.with_orderable_window()
+ if strategy is not None
+ else self._metadata,
+ )
+
+ # --- partial reduction ---
+ def drop_nulls(self) -> Self:
+ """Drop null values.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a").drop_nulls())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | shape: (5, 1) |
+ | ┌─────┐ |
+ | │ a │ |
+ | │ --- │ |
+ | │ f64 │ |
+ | ╞═════╡ |
+ | │ 2.0 │ |
+ | │ 4.0 │ |
+ | │ NaN │ |
+ | │ 3.0 │ |
+ | │ 5.0 │ |
+ | └─────┘ |
+ └──────────────────┘
+ """
+ return self._with_filtration(
+ lambda plx: self._to_compliant_expr(plx).drop_nulls()
+ )
+
+ def sample(
+ self,
+ n: int | None = None,
+ *,
+ fraction: float | None = None,
+ with_replacement: bool = False,
+ seed: int | None = None,
+ ) -> Self:
+ """Sample randomly from this expression.
+
+ Warning:
+ `Expr.sample` is deprecated and will be removed in a future version.
+ Hint: instead of `df.select(nw.col('a').sample())`, use
+ `df.select(nw.col('a')).sample()` instead.
+ Note: this will remain available in `narwhals.stable.v1`.
+ See [stable api](../backcompat.md/) for more information.
+
+ Arguments:
+ n: Number of items to return. Cannot be used with fraction.
+ fraction: Fraction of items to return. Cannot be used with n.
+ with_replacement: Allow values to be sampled more than once.
+ seed: Seed for the random number generator. If set to None (default), a random
+ seed is generated for each sample operation.
+
+ Returns:
+ A new expression.
+ """
+ msg = (
+ "`Expr.sample` is deprecated and will be removed in a future version.\n\n"
+ "Hint: instead of `df.select(nw.col('a').sample())`, use `df.select(nw.col('a')).sample()`.\n\n"
+ "Note: this will remain available in `narwhals.stable.v1`.\n"
+ "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
+ )
+ issue_deprecation_warning(msg, _version="1.23.0")
+ return self._with_filtration(
+ lambda plx: self._to_compliant_expr(plx).sample(
+ n, fraction=fraction, with_replacement=with_replacement, seed=seed
+ )
+ )
+
+ def over(
+ self,
+ *partition_by: str | Sequence[str],
+ order_by: str | Sequence[str] | None = None,
+ ) -> Self:
+ """Compute expressions over the given groups (optionally with given order).
+
+ Arguments:
+ partition_by: Names of columns to compute window expression over.
+ Must be names of columns, as opposed to expressions -
+ so, this is a bit less flexible than Polars' `Expr.over`.
+ order_by: Column(s) to order window functions by.
+ For lazy backends, this argument is required when `over` is applied
+ to order-dependent functions, see [order-dependence](../concepts/order_dependence.md).
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 4], "b": ["x", "x", "y"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_min_per_group=nw.col("a").min().over("b"))
+ ┌────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------|
+ | a b a_min_per_group|
+ |0 1 x 1|
+ |1 2 x 1|
+ |2 4 y 4|
+ └────────────────────────┘
+
+ Cumulative operations are also supported, but (currently) only for
+ pandas and Polars:
+
+ >>> df.with_columns(a_cum_sum_per_group=nw.col("a").cum_sum().over("b"))
+ ┌────────────────────────────┐
+ | Narwhals DataFrame |
+ |----------------------------|
+ | a b a_cum_sum_per_group|
+ |0 1 x 1|
+ |1 2 x 3|
+ |2 4 y 4|
+ └────────────────────────────┘
+ """
+ flat_partition_by = flatten(partition_by)
+ flat_order_by = [order_by] if isinstance(order_by, str) else (order_by or [])
+ if not flat_partition_by and not flat_order_by: # pragma: no cover
+ msg = "At least one of `partition_by` or `order_by` must be specified."
+ raise ValueError(msg)
+
+ current_meta = self._metadata
+ if flat_order_by:
+ next_meta = current_meta.with_ordered_over()
+ elif not flat_partition_by: # pragma: no cover
+ msg = "At least one of `partition_by` or `order_by` must be specified."
+ raise InvalidOperationError(msg)
+ else:
+ next_meta = current_meta.with_partitioned_over()
+
+ return self.__class__(
+ lambda plx: self._to_compliant_expr(plx).over(
+ flat_partition_by, flat_order_by
+ ),
+ next_meta,
+ )
+
+ def is_duplicated(self) -> Self:
+ r"""Return a boolean mask indicating duplicated values.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(nw.all().is_duplicated().name.suffix("_is_duplicated"))
+ ┌─────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------------------------|
+ | a b a_is_duplicated b_is_duplicated|
+ |0 1 a True True|
+ |1 2 a False True|
+ |2 3 b False False|
+ |3 1 c True False|
+ └─────────────────────────────────────────┘
+ """
+ return ~self.is_unique()
+
+ def is_unique(self) -> Self:
+ r"""Return a boolean mask indicating unique values.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(nw.all().is_unique().name.suffix("_is_unique"))
+ ┌─────────────────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------------------|
+ | a b a_is_unique b_is_unique|
+ |0 1 a False False|
+ |1 2 a True False|
+ |2 3 b True True|
+ |3 1 c False True|
+ └─────────────────────────────────┘
+ """
+ return self._with_unorderable_window(
+ lambda plx: self._to_compliant_expr(plx).is_unique()
+ )
+
+ def null_count(self) -> Self:
+ r"""Count null values.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.all().null_count())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 1 2 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(
+ lambda plx: self._to_compliant_expr(plx).null_count()
+ )
+
+ def is_first_distinct(self) -> Self:
+ r"""Return a boolean mask indicating the first occurrence of each distinct value.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.all().is_first_distinct().name.suffix("_is_first_distinct")
+ ... )
+ ┌─────────────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |-------------------------------------------------|
+ | a b a_is_first_distinct b_is_first_distinct|
+ |0 1 a True True|
+ |1 2 a True False|
+ |2 3 b True True|
+ |3 1 c False True|
+ └─────────────────────────────────────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).is_first_distinct()
+ )
+
+ def is_last_distinct(self) -> Self:
+ r"""Return a boolean mask indicating the last occurrence of each distinct value.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.all().is_last_distinct().name.suffix("_is_last_distinct")
+ ... )
+ ┌───────────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------------------------------|
+ | a b a_is_last_distinct b_is_last_distinct|
+ |0 1 a False False|
+ |1 2 a True True|
+ |2 3 b True True|
+ |3 1 c True True|
+ └───────────────────────────────────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).is_last_distinct()
+ )
+
+ def quantile(
+ self, quantile: float, interpolation: RollingInterpolationMethod
+ ) -> Self:
+ r"""Get quantile value.
+
+ Arguments:
+ quantile: Quantile between 0.0 and 1.0.
+ interpolation: Interpolation method.
+
+ Returns:
+ A new expression.
+
+ Note:
+ - pandas and Polars may have implementation differences for a given interpolation method.
+ - [dask](https://docs.dask.org/en/stable/generated/dask.dataframe.Series.quantile.html) has
+ its own method to approximate quantile and it doesn't implement 'nearest', 'higher',
+ 'lower', 'midpoint' as interpolation method - use 'linear' which is closest to the
+ native 'dask' - method.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"a": list(range(50)), "b": list(range(50, 100))}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").quantile(0.5, interpolation="linear"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 24.5 74.5 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(
+ lambda plx: self._to_compliant_expr(plx).quantile(quantile, interpolation)
+ )
+
+ def head(self, n: int = 10) -> Self:
+ r"""Get the first `n` rows.
+
+ Warning:
+ `Expr.head` is deprecated and will be removed in a future version.
+ Hint: instead of `df.select(nw.col('a').head())`, use
+ `df.select(nw.col('a')).head()` instead.
+ Note: this will remain available in `narwhals.stable.v1`.
+ See [stable api](../backcompat.md/) for more information.
+
+ Arguments:
+ n: Number of rows to return.
+
+ Returns:
+ A new expression.
+ """
+ msg = (
+ "`Expr.head` is deprecated and will be removed in a future version.\n\n"
+ "Hint: instead of `df.select(nw.col('a').head())`, use `df.select(nw.col('a')).head()`.\n\n"
+ "Note: this will remain available in `narwhals.stable.v1`.\n"
+ "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
+ )
+ issue_deprecation_warning(msg, _version="1.23.0")
+ return self._with_orderable_filtration(
+ lambda plx: self._to_compliant_expr(plx).head(n)
+ )
+
+ def tail(self, n: int = 10) -> Self:
+ r"""Get the last `n` rows.
+
+ Warning:
+ `Expr.tail` is deprecated and will be removed in a future version.
+ Hint: instead of `df.select(nw.col('a').tail())`, use
+ `df.select(nw.col('a')).tail()` instead.
+ Note: this will remain available in `narwhals.stable.v1`.
+ See [stable api](../backcompat.md/) for more information.
+
+ Arguments:
+ n: Number of rows to return.
+
+ Returns:
+ A new expression.
+ """
+ msg = (
+ "`Expr.tail` is deprecated and will be removed in a future version.\n\n"
+ "Hint: instead of `df.select(nw.col('a').tail())`, use `df.select(nw.col('a')).tail()`.\n\n"
+ "Note: this will remain available in `narwhals.stable.v1`.\n"
+ "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
+ )
+ issue_deprecation_warning(msg, _version="1.23.0")
+ return self._with_filtration(lambda plx: self._to_compliant_expr(plx).tail(n))
+
+ def round(self, decimals: int = 0) -> Self:
+ r"""Round underlying floating point data by `decimals` digits.
+
+ Arguments:
+ decimals: Number of decimals to round by.
+
+ Returns:
+ A new expression.
+
+
+ Notes:
+ For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.
+
+ pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and
+ 4.5 to 4.0, etc..).
+
+ Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1.12345, 2.56789, 3.901234]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_rounded=nw.col("a").round(1))
+ ┌──────────────────────┐
+ | Narwhals DataFrame |
+ |----------------------|
+ | a a_rounded|
+ |0 1.123450 1.1|
+ |1 2.567890 2.6|
+ |2 3.901234 3.9|
+ └──────────────────────┘
+ """
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).round(decimals)
+ )
+
+ def len(self) -> Self:
+ r"""Return the number of elements in the column.
+
+ Null values count towards the total.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": ["x", "y", "z"], "b": [1, 2, 1]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(
+ ... nw.col("a").filter(nw.col("b") == 1).len().alias("a1"),
+ ... nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
+ ... )
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a1 a2 |
+ | 0 2 1 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).len())
+
+ def gather_every(self, n: int, offset: int = 0) -> Self:
+ r"""Take every nth value in the Series and return as new Series.
+
+ Warning:
+ `Expr.gather_every` is deprecated and will be removed in a future version.
+ Hint: instead of `df.select(nw.col('a').gather_every())`, use
+ `df.select(nw.col('a')).gather_every()` instead.
+ Note: this will remain available in `narwhals.stable.v1`.
+ See [stable api](../backcompat.md/) for more information.
+
+ Arguments:
+ n: Gather every *n*-th row.
+ offset: Starting index.
+
+ Returns:
+ A new expression.
+ """
+ msg = (
+ "`Expr.gather_every` is deprecated and will be removed in a future version.\n\n"
+ "Hint: instead of `df.select(nw.col('a').gather_every())`, use `df.select(nw.col('a')).gather_every()`.\n\n"
+ "Note: this will remain available in `narwhals.stable.v1`.\n"
+ "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
+ )
+ issue_deprecation_warning(msg, _version="1.23.0")
+ return self._with_filtration(
+ lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset)
+ )
+
+ def clip(
+ self,
+ lower_bound: IntoExpr | NumericLiteral | TemporalLiteral | None = None,
+ upper_bound: IntoExpr | NumericLiteral | TemporalLiteral | None = None,
+ ) -> Self:
+ r"""Clip values in the Series.
+
+ Arguments:
+ lower_bound: Lower bound value. String literals are treated as column names.
+ upper_bound: Upper bound value. String literals are treated as column names.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_clipped=nw.col("a").clip(-1, 3))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a a_clipped |
+ | 0 1 1 |
+ | 1 2 2 |
+ | 2 3 3 |
+ └──────────────────┘
+ """
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx,
+ lambda *exprs: exprs[0].clip(
+ exprs[1] if lower_bound is not None else None,
+ exprs[2] if upper_bound is not None else None,
+ ),
+ self,
+ lower_bound,
+ upper_bound,
+ str_as_lit=False,
+ ),
+ combine_metadata(
+ self,
+ lower_bound,
+ upper_bound,
+ str_as_lit=False,
+ allow_multi_output=False,
+ to_single_output=False,
+ ),
+ )
+
+ def mode(self) -> Self:
+ r"""Compute the most occurring value(s).
+
+ Can return multiple values.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 1, 2, 3], "b": [1, 1, 2, 2]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a").mode()).sort("a")
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a |
+ | 0 1 |
+ └──────────────────┘
+ """
+ return self._with_filtration(lambda plx: self._to_compliant_expr(plx).mode())
+
+ def is_finite(self) -> Self:
+ """Returns boolean values indicating which original values are finite.
+
+ Warning:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+ `is_finite` will return False for NaN and Null's in the Dask and
+ pandas non-nullable backend, while for Polars, PyArrow and pandas
+ nullable backends null values are kept as such.
+
+ Returns:
+ Expression of `Boolean` data type.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [float("nan"), float("inf"), 2.0, None]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_is_finite=nw.col("a").is_finite())
+ ┌──────────────────────┐
+ | Narwhals DataFrame |
+ |----------------------|
+ |shape: (4, 2) |
+ |┌──────┬─────────────┐|
+ |│ a ┆ a_is_finite │|
+ |│ --- ┆ --- │|
+ |│ f64 ┆ bool │|
+ |╞══════╪═════════════╡|
+ |│ NaN ┆ false │|
+ |│ inf ┆ false │|
+ |│ 2.0 ┆ true │|
+ |│ null ┆ null │|
+ |└──────┴─────────────┘|
+ └──────────────────────┘
+ """
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).is_finite()
+ )
+
+ def cum_count(self, *, reverse: bool = False) -> Self:
+ r"""Return the cumulative count of the non-null values in the column.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ reverse: reverse the operation
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": ["x", "k", None, "d"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("a").cum_count().alias("a_cum_count"),
+ ... nw.col("a").cum_count(reverse=True).alias("a_cum_count_reverse"),
+ ... )
+ ┌─────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------------------------|
+ | a a_cum_count a_cum_count_reverse|
+ |0 x 1 3|
+ |1 k 2 2|
+ |2 None 2 1|
+ |3 d 3 1|
+ └─────────────────────────────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).cum_count(reverse=reverse)
+ )
+
+ def cum_min(self, *, reverse: bool = False) -> Self:
+ r"""Return the cumulative min of the non-null values in the column.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ reverse: reverse the operation
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [3, 1, None, 2]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("a").cum_min().alias("a_cum_min"),
+ ... nw.col("a").cum_min(reverse=True).alias("a_cum_min_reverse"),
+ ... )
+ ┌────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------------------|
+ | a a_cum_min a_cum_min_reverse|
+ |0 3.0 3.0 1.0|
+ |1 1.0 1.0 1.0|
+ |2 NaN NaN NaN|
+ |3 2.0 1.0 2.0|
+ └────────────────────────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).cum_min(reverse=reverse)
+ )
+
+ def cum_max(self, *, reverse: bool = False) -> Self:
+ r"""Return the cumulative max of the non-null values in the column.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ reverse: reverse the operation
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 3, None, 2]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("a").cum_max().alias("a_cum_max"),
+ ... nw.col("a").cum_max(reverse=True).alias("a_cum_max_reverse"),
+ ... )
+ ┌────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------------------|
+ | a a_cum_max a_cum_max_reverse|
+ |0 1.0 1.0 3.0|
+ |1 3.0 3.0 3.0|
+ |2 NaN NaN NaN|
+ |3 2.0 3.0 2.0|
+ └────────────────────────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).cum_max(reverse=reverse)
+ )
+
+ def cum_prod(self, *, reverse: bool = False) -> Self:
+ r"""Return the cumulative product of the non-null values in the column.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ reverse: reverse the operation
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 3, None, 2]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("a").cum_prod().alias("a_cum_prod"),
+ ... nw.col("a").cum_prod(reverse=True).alias("a_cum_prod_reverse"),
+ ... )
+ ┌──────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |--------------------------------------|
+ | a a_cum_prod a_cum_prod_reverse|
+ |0 1.0 1.0 6.0|
+ |1 3.0 3.0 6.0|
+ |2 NaN NaN NaN|
+ |3 2.0 6.0 2.0|
+ └──────────────────────────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).cum_prod(reverse=reverse)
+ )
+
+ def rolling_sum(
+ self, window_size: int, *, min_samples: int | None = None, center: bool = False
+ ) -> Self:
+ """Apply a rolling sum (moving sum) over the values.
+
+ A window of length `window_size` will traverse the values. The resulting values
+ will be aggregated to their sum.
+
+ The window at a given row will include the row itself and the `window_size - 1`
+ elements before it.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ window_size: The length of the window in number of elements. It must be a
+ strictly positive integer.
+ min_samples: The number of values in the window that should be non-null before
+ computing a result. If set to `None` (default), it will be set equal to
+ `window_size`. If provided, it must be a strictly positive integer, and
+ less than or equal to `window_size`
+ center: Set the labels at the center of the window.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_rolling_sum=nw.col("a").rolling_sum(window_size=3, min_samples=1)
+ ... )
+ ┌─────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------|
+ | a a_rolling_sum|
+ |0 1.0 1.0|
+ |1 2.0 3.0|
+ |2 NaN 3.0|
+ |3 4.0 6.0|
+ └─────────────────────┘
+ """
+ window_size, min_samples_int = _validate_rolling_arguments(
+ window_size=window_size, min_samples=min_samples
+ )
+
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).rolling_sum(
+ window_size=window_size, min_samples=min_samples_int, center=center
+ )
+ )
+
+ def rolling_mean(
+ self, window_size: int, *, min_samples: int | None = None, center: bool = False
+ ) -> Self:
+ """Apply a rolling mean (moving mean) over the values.
+
+ A window of length `window_size` will traverse the values. The resulting values
+ will be aggregated to their mean.
+
+ The window at a given row will include the row itself and the `window_size - 1`
+ elements before it.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ window_size: The length of the window in number of elements. It must be a
+ strictly positive integer.
+ min_samples: The number of values in the window that should be non-null before
+ computing a result. If set to `None` (default), it will be set equal to
+ `window_size`. If provided, it must be a strictly positive integer, and
+ less than or equal to `window_size`
+ center: Set the labels at the center of the window.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_rolling_mean=nw.col("a").rolling_mean(window_size=3, min_samples=1)
+ ... )
+ ┌──────────────────────┐
+ | Narwhals DataFrame |
+ |----------------------|
+ | a a_rolling_mean|
+ |0 1.0 1.0|
+ |1 2.0 1.5|
+ |2 NaN 1.5|
+ |3 4.0 3.0|
+ └──────────────────────┘
+ """
+ window_size, min_samples = _validate_rolling_arguments(
+ window_size=window_size, min_samples=min_samples
+ )
+
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).rolling_mean(
+ window_size=window_size, min_samples=min_samples, center=center
+ )
+ )
+
+ def rolling_var(
+ self,
+ window_size: int,
+ *,
+ min_samples: int | None = None,
+ center: bool = False,
+ ddof: int = 1,
+ ) -> Self:
+ """Apply a rolling variance (moving variance) over the values.
+
+ A window of length `window_size` will traverse the values. The resulting values
+ will be aggregated to their variance.
+
+ The window at a given row will include the row itself and the `window_size - 1`
+ elements before it.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ window_size: The length of the window in number of elements. It must be a
+ strictly positive integer.
+ min_samples: The number of values in the window that should be non-null before
+ computing a result. If set to `None` (default), it will be set equal to
+ `window_size`. If provided, it must be a strictly positive integer, and
+ less than or equal to `window_size`.
+ center: Set the labels at the center of the window.
+ ddof: Delta Degrees of Freedom; the divisor for a length N window is N - ddof.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_rolling_var=nw.col("a").rolling_var(window_size=3, min_samples=1)
+ ... )
+ ┌─────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------|
+ | a a_rolling_var|
+ |0 1.0 NaN|
+ |1 2.0 0.5|
+ |2 NaN 0.5|
+ |3 4.0 2.0|
+ └─────────────────────┘
+ """
+ window_size, min_samples = _validate_rolling_arguments(
+ window_size=window_size, min_samples=min_samples
+ )
+
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).rolling_var(
+ window_size=window_size, min_samples=min_samples, center=center, ddof=ddof
+ )
+ )
+
+ def rolling_std(
+ self,
+ window_size: int,
+ *,
+ min_samples: int | None = None,
+ center: bool = False,
+ ddof: int = 1,
+ ) -> Self:
+ """Apply a rolling standard deviation (moving standard deviation) over the values.
+
+ A window of length `window_size` will traverse the values. The resulting values
+ will be aggregated to their standard deviation.
+
+ The window at a given row will include the row itself and the `window_size - 1`
+ elements before it.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ window_size: The length of the window in number of elements. It must be a
+ strictly positive integer.
+ min_samples: The number of values in the window that should be non-null before
+ computing a result. If set to `None` (default), it will be set equal to
+ `window_size`. If provided, it must be a strictly positive integer, and
+ less than or equal to `window_size`.
+ center: Set the labels at the center of the window.
+ ddof: Delta Degrees of Freedom; the divisor for a length N window is N - ddof.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_rolling_std=nw.col("a").rolling_std(window_size=3, min_samples=1)
+ ... )
+ ┌─────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------|
+ | a a_rolling_std|
+ |0 1.0 NaN|
+ |1 2.0 0.707107|
+ |2 NaN 0.707107|
+ |3 4.0 1.414214|
+ └─────────────────────┘
+ """
+ window_size, min_samples = _validate_rolling_arguments(
+ window_size=window_size, min_samples=min_samples
+ )
+
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).rolling_std(
+ window_size=window_size, min_samples=min_samples, center=center, ddof=ddof
+ )
+ )
+
+ def rank(self, method: RankMethod = "average", *, descending: bool = False) -> Self:
+ """Assign ranks to data, dealing with ties appropriately.
+
+ Notes:
+ The resulting dtype may differ between backends.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ method: The method used to assign ranks to tied elements.
+ The following methods are available (default is 'average')
+
+ - *"average"*: The average of the ranks that would have been assigned to
+ all the tied values is assigned to each value.
+ - *"min"*: The minimum of the ranks that would have been assigned to all
+ the tied values is assigned to each value. (This is also referred to
+ as "competition" ranking.)
+ - *"max"*: The maximum of the ranks that would have been assigned to all
+ the tied values is assigned to each value.
+ - *"dense"*: Like "min", but the rank of the next highest element is
+ assigned the rank immediately after those assigned to the tied elements.
+ - *"ordinal"*: All values are given a distinct rank, corresponding to the
+ order that the values occur in the Series.
+
+ descending: Rank in descending order.
+
+ Returns:
+ A new expression with rank data.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [3, 6, 1, 1, 6]})
+ >>> df = nw.from_native(df_native)
+ >>> result = df.with_columns(rank=nw.col("a").rank(method="dense"))
+ >>> result
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a rank |
+ | 0 3 2.0 |
+ | 1 6 3.0 |
+ | 2 1 1.0 |
+ | 3 1 1.0 |
+ | 4 6 3.0 |
+ └──────────────────┘
+ """
+ supported_rank_methods = {"average", "min", "max", "dense", "ordinal"}
+ if method not in supported_rank_methods:
+ msg = (
+ "Ranking method must be one of {'average', 'min', 'max', 'dense', 'ordinal'}. "
+ f"Found '{method}'"
+ )
+ raise ValueError(msg)
+
+ return self._with_unorderable_window(
+ lambda plx: self._to_compliant_expr(plx).rank(
+ method=method, descending=descending
+ )
+ )
+
+ def log(self, base: float = math.e) -> Self:
+ r"""Compute the logarithm to a given base.
+
+ Arguments:
+ base: Given base, defaults to `e`
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"values": [1, 2, 4]})
+ >>> df = nw.from_native(df_native)
+ >>> result = df.with_columns(
+ ... log=nw.col("values").log(), log_2=nw.col("values").log(base=2)
+ ... )
+ >>> result
+ ┌────────────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------------------------------|
+ |pyarrow.Table |
+ |values: int64 |
+ |log: double |
+ |log_2: double |
+ |---- |
+ |values: [[1,2,4]] |
+ |log: [[0,0.6931471805599453,1.3862943611198906]]|
+ |log_2: [[0,1,2]] |
+ └────────────────────────────────────────────────┘
+ """
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).log(base=base)
+ )
+
+ def exp(self) -> Self:
+ r"""Compute the exponent.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"values": [-1, 0, 1]})
+ >>> df = nw.from_native(df_native)
+ >>> result = df.with_columns(exp=nw.col("values").exp())
+ >>> result
+ ┌────────────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------------------------------|
+ |pyarrow.Table |
+ |values: int64 |
+ |exp: double |
+ |---- |
+ |values: [[-1,0,1]] |
+ |exp: [[0.36787944117144233,1,2.718281828459045]]|
+ └────────────────────────────────────────────────┘
+ """
+ return self._with_elementwise_op(lambda plx: self._to_compliant_expr(plx).exp())
+
+ @property
+ def str(self) -> ExprStringNamespace[Self]:
+ return ExprStringNamespace(self)
+
+ @property
+ def dt(self) -> ExprDateTimeNamespace[Self]:
+ return ExprDateTimeNamespace(self)
+
+ @property
+ def cat(self) -> ExprCatNamespace[Self]:
+ return ExprCatNamespace(self)
+
+ @property
+ def name(self) -> ExprNameNamespace[Self]:
+ return ExprNameNamespace(self)
+
+ @property
+ def list(self) -> ExprListNamespace[Self]:
+ return ExprListNamespace(self)
+
+ @property
+ def struct(self) -> ExprStructNamespace[Self]:
+ return ExprStructNamespace(self)
+
+
+__all__ = ["Expr"]