from __future__ import annotations from typing import TYPE_CHECKING, Any, Iterable, Iterator, Mapping, Sequence, cast import numpy as np from narwhals._compliant import EagerSeries from narwhals._pandas_like.series_cat import PandasLikeSeriesCatNamespace from narwhals._pandas_like.series_dt import PandasLikeSeriesDateTimeNamespace from narwhals._pandas_like.series_list import PandasLikeSeriesListNamespace from narwhals._pandas_like.series_str import PandasLikeSeriesStringNamespace from narwhals._pandas_like.series_struct import PandasLikeSeriesStructNamespace from narwhals._pandas_like.utils import ( align_and_extract_native, get_dtype_backend, narwhals_to_native_dtype, native_to_narwhals_dtype, object_native_to_narwhals_dtype, rename, select_columns_by_name, set_index, ) from narwhals._utils import ( Implementation, is_list_of, parse_version, validate_backend_version, ) from narwhals.dependencies import is_numpy_array_1d, is_pandas_like_series from narwhals.exceptions import InvalidOperationError if TYPE_CHECKING: from types import ModuleType from typing import Hashable import pandas as pd import polars as pl import pyarrow as pa from typing_extensions import Self, TypeIs from narwhals._arrow.typing import ChunkedArrayAny from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.namespace import PandasLikeNamespace from narwhals._utils import Version, _FullContext from narwhals.dtypes import DType from narwhals.typing import ( ClosedInterval, FillNullStrategy, Into1DArray, IntoDType, NonNestedLiteral, NumericLiteral, RankMethod, RollingInterpolationMethod, SizedMultiIndexSelector, TemporalLiteral, _1DArray, _AnyDArray, _SliceIndex, ) PANDAS_TO_NUMPY_DTYPE_NO_MISSING = { "Int64": "int64", "int64[pyarrow]": "int64", "Int32": "int32", "int32[pyarrow]": "int32", "Int16": "int16", "int16[pyarrow]": "int16", "Int8": "int8", "int8[pyarrow]": "int8", "UInt64": "uint64", "uint64[pyarrow]": "uint64", "UInt32": "uint32", "uint32[pyarrow]": "uint32", "UInt16": "uint16", "uint16[pyarrow]": "uint16", "UInt8": "uint8", "uint8[pyarrow]": "uint8", "Float64": "float64", "float64[pyarrow]": "float64", "Float32": "float32", "float32[pyarrow]": "float32", } PANDAS_TO_NUMPY_DTYPE_MISSING = { "Int64": "float64", "int64[pyarrow]": "float64", "Int32": "float64", "int32[pyarrow]": "float64", "Int16": "float64", "int16[pyarrow]": "float64", "Int8": "float64", "int8[pyarrow]": "float64", "UInt64": "float64", "uint64[pyarrow]": "float64", "UInt32": "float64", "uint32[pyarrow]": "float64", "UInt16": "float64", "uint16[pyarrow]": "float64", "UInt8": "float64", "uint8[pyarrow]": "float64", "Float64": "float64", "float64[pyarrow]": "float64", "Float32": "float32", "float32[pyarrow]": "float32", } class PandasLikeSeries(EagerSeries[Any]): def __init__( self, native_series: Any, *, implementation: Implementation, backend_version: tuple[int, ...], version: Version, ) -> None: self._name = native_series.name self._native_series = native_series self._implementation = implementation self._backend_version = backend_version self._version = version validate_backend_version(self._implementation, self._backend_version) # Flag which indicates if, in the final step before applying an operation, # the single value behind the PandasLikeSeries should be extract and treated # as a Scalar. For example, in `nw.col('a') - nw.lit(3)`, the latter would # become a Series of length 1. Rather that doing a full broadcast so it matches # the length of the whole dataframe, we just extract the scalar. self._broadcast = False @property def native(self) -> Any: return self._native_series def __native_namespace__(self) -> ModuleType: if self._implementation.is_pandas_like(): return self._implementation.to_native_namespace() msg = f"Expected pandas/modin/cudf, got: {type(self._implementation)}" # pragma: no cover raise AssertionError(msg) def __narwhals_namespace__(self) -> PandasLikeNamespace: from narwhals._pandas_like.namespace import PandasLikeNamespace return PandasLikeNamespace( self._implementation, self._backend_version, self._version ) def _gather(self, rows: SizedMultiIndexSelector[pd.Series[Any]]) -> Self: rows = list(rows) if isinstance(rows, tuple) else rows return self._with_native(self.native.iloc[rows]) def _gather_slice(self, rows: _SliceIndex | range) -> Self: return self._with_native( self.native.iloc[slice(rows.start, rows.stop, rows.step)] ) def _with_version(self, version: Version) -> Self: return self.__class__( self.native, implementation=self._implementation, backend_version=self._backend_version, version=version, ) def _with_native(self, series: Any, *, preserve_broadcast: bool = False) -> Self: result = self.__class__( series, implementation=self._implementation, backend_version=self._backend_version, version=self._version, ) if preserve_broadcast: result._broadcast = self._broadcast return result @classmethod def from_iterable( cls, data: Iterable[Any], *, context: _FullContext, name: str = "", dtype: IntoDType | None = None, index: Any = None, ) -> Self: implementation = context._implementation backend_version = context._backend_version version = context._version ns = implementation.to_native_namespace() kwds: dict[str, Any] = {} if dtype: kwds["dtype"] = narwhals_to_native_dtype( dtype, None, implementation, backend_version, version ) else: if implementation.is_pandas(): kwds["copy"] = False if index is not None and len(index): kwds["index"] = index return cls.from_native(ns.Series(data, name=name, **kwds), context=context) @staticmethod def _is_native(obj: Any) -> TypeIs[Any]: return is_pandas_like_series(obj) # pragma: no cover @classmethod def from_native(cls, data: Any, /, *, context: _FullContext) -> Self: return cls( data, implementation=context._implementation, backend_version=context._backend_version, version=context._version, ) @classmethod def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: implementation = context._implementation arr = data if is_numpy_array_1d(data) else [data] native = implementation.to_native_namespace().Series(arr, name="") return cls.from_native(native, context=context) @property def name(self) -> str: return self._name @property def dtype(self) -> DType: native_dtype = self.native.dtype return ( native_to_narwhals_dtype(native_dtype, self._version, self._implementation) if native_dtype != "object" else object_native_to_narwhals_dtype( self.native, self._version, self._implementation ) ) def ewm_mean( self, *, com: float | None, span: float | None, half_life: float | None, alpha: float | None, adjust: bool, min_samples: int, ignore_nulls: bool, ) -> PandasLikeSeries: ser = self.native mask_na = ser.isna() if self._implementation is Implementation.CUDF: if (min_samples == 0 and not ignore_nulls) or (not mask_na.any()): result = ser.ewm( com=com, span=span, halflife=half_life, alpha=alpha, adjust=adjust ).mean() else: msg = ( "cuDF only supports `ewm_mean` when there are no missing values " "or when both `min_period=0` and `ignore_nulls=False`" ) raise NotImplementedError(msg) else: result = ser.ewm( com, span, half_life, alpha, min_samples, adjust, ignore_na=ignore_nulls ).mean() result[mask_na] = None return self._with_native(result) def scatter(self, indices: int | Sequence[int], values: Any) -> Self: if isinstance(values, self.__class__): values = set_index( values.native, self.native.index[indices], implementation=self._implementation, backend_version=self._backend_version, ) s = self.native.copy(deep=True) s.iloc[indices] = values s.name = self.name return self._with_native(s) def _scatter_in_place(self, indices: Self, values: Self) -> None: # Scatter, modifying original Series. Use with care! values_native = set_index( values.native, self.native.index[indices.native], implementation=self._implementation, backend_version=self._backend_version, ) if self._implementation is Implementation.PANDAS and parse_version(np) < (2,): values_native = values_native.copy() # pragma: no cover min_pd_version = (1, 2) if ( self._implementation is Implementation.PANDAS and self._backend_version < min_pd_version ): self.native.iloc[indices.native.values] = values_native # noqa: PD011 else: self.native.iloc[indices.native] = values_native def cast(self, dtype: IntoDType) -> Self: pd_dtype = narwhals_to_native_dtype( dtype, dtype_backend=get_dtype_backend(self.native.dtype, self._implementation), implementation=self._implementation, backend_version=self._backend_version, version=self._version, ) return self._with_native(self.native.astype(pd_dtype), preserve_broadcast=True) def item(self, index: int | None) -> Any: # cuDF doesn't have Series.item(). if index is None: if len(self) != 1: msg = ( "can only call '.item()' if the Series is of length 1," f" or an explicit index is provided (Series is of length {len(self)})" ) raise ValueError(msg) return self.native.iloc[0] return self.native.iloc[index] def to_frame(self) -> PandasLikeDataFrame: from narwhals._pandas_like.dataframe import PandasLikeDataFrame return PandasLikeDataFrame( self.native.to_frame(), implementation=self._implementation, backend_version=self._backend_version, version=self._version, validate_column_names=False, ) def to_list(self) -> list[Any]: is_cudf = self._implementation.is_cudf() return self.native.to_arrow().to_pylist() if is_cudf else self.native.to_list() def is_between( self, lower_bound: Any, upper_bound: Any, closed: ClosedInterval ) -> Self: ser = self.native _, lower_bound = align_and_extract_native(self, lower_bound) _, upper_bound = align_and_extract_native(self, upper_bound) if closed == "left": res = ser.ge(lower_bound) & ser.lt(upper_bound) elif closed == "right": res = ser.gt(lower_bound) & ser.le(upper_bound) elif closed == "none": res = ser.gt(lower_bound) & ser.lt(upper_bound) elif closed == "both": res = ser.ge(lower_bound) & ser.le(upper_bound) else: # pragma: no cover raise AssertionError return self._with_native(res).alias(ser.name) def is_in(self, other: Any) -> PandasLikeSeries: return self._with_native(self.native.isin(other)) def arg_true(self) -> PandasLikeSeries: ser = self.native result = ser.__class__(range(len(ser)), name=ser.name, index=ser.index).loc[ser] return self._with_native(result) def arg_min(self) -> int: if self._implementation is Implementation.PANDAS and self._backend_version < (1,): return self.native.to_numpy().argmin() return self.native.argmin() def arg_max(self) -> int: ser = self.native if self._implementation is Implementation.PANDAS and self._backend_version < (1,): return ser.to_numpy().argmax() return ser.argmax() # Binary comparisons def filter(self, predicate: Any) -> PandasLikeSeries: if not is_list_of(predicate, bool): _, other_native = align_and_extract_native(self, predicate) else: other_native = predicate return self._with_native(self.native.loc[other_native]).alias(self.name) def __eq__(self, other: object) -> PandasLikeSeries: # type: ignore[override] ser, other = align_and_extract_native(self, other) return self._with_native(ser == other).alias(self.name) def __ne__(self, other: object) -> PandasLikeSeries: # type: ignore[override] ser, other = align_and_extract_native(self, other) return self._with_native(ser != other).alias(self.name) def __ge__(self, other: Any) -> PandasLikeSeries: ser, other = align_and_extract_native(self, other) return self._with_native(ser >= other).alias(self.name) def __gt__(self, other: Any) -> PandasLikeSeries: ser, other = align_and_extract_native(self, other) return self._with_native(ser > other).alias(self.name) def __le__(self, other: Any) -> PandasLikeSeries: ser, other = align_and_extract_native(self, other) return self._with_native(ser <= other).alias(self.name) def __lt__(self, other: Any) -> PandasLikeSeries: ser, other = align_and_extract_native(self, other) return self._with_native(ser < other).alias(self.name) def __and__(self, other: Any) -> PandasLikeSeries: ser, other = align_and_extract_native(self, other) return self._with_native(ser & other).alias(self.name) def __rand__(self, other: Any) -> PandasLikeSeries: ser, other = align_and_extract_native(self, other) ser = cast("pd.Series[Any]", ser) return self._with_native(ser.__and__(other)).alias(self.name) def __or__(self, other: Any) -> PandasLikeSeries: ser, other = align_and_extract_native(self, other) return self._with_native(ser | other).alias(self.name) def __ror__(self, other: Any) -> PandasLikeSeries: ser, other = align_and_extract_native(self, other) ser = cast("pd.Series[Any]", ser) return self._with_native(ser.__or__(other)).alias(self.name) def __add__(self, other: Any) -> PandasLikeSeries: ser, other = align_and_extract_native(self, other) return self._with_native(ser + other).alias(self.name) def __radd__(self, other: Any) -> PandasLikeSeries: _, other_native = align_and_extract_native(self, other) return self._with_native(self.native.__radd__(other_native)).alias(self.name) def __sub__(self, other: Any) -> PandasLikeSeries: ser, other = align_and_extract_native(self, other) return self._with_native(ser - other).alias(self.name) def __rsub__(self, other: Any) -> PandasLikeSeries: _, other_native = align_and_extract_native(self, other) return self._with_native(self.native.__rsub__(other_native)).alias(self.name) def __mul__(self, other: Any) -> PandasLikeSeries: ser, other = align_and_extract_native(self, other) return self._with_native(ser * other).alias(self.name) def __rmul__(self, other: Any) -> PandasLikeSeries: _, other_native = align_and_extract_native(self, other) return self._with_native(self.native.__rmul__(other_native)).alias(self.name) def __truediv__(self, other: Any) -> PandasLikeSeries: ser, other = align_and_extract_native(self, other) return self._with_native(ser / other).alias(self.name) def __rtruediv__(self, other: Any) -> PandasLikeSeries: _, other_native = align_and_extract_native(self, other) return self._with_native(self.native.__rtruediv__(other_native)).alias(self.name) def __floordiv__(self, other: Any) -> PandasLikeSeries: ser, other = align_and_extract_native(self, other) return self._with_native(ser // other).alias(self.name) def __rfloordiv__(self, other: Any) -> PandasLikeSeries: _, other_native = align_and_extract_native(self, other) return self._with_native(self.native.__rfloordiv__(other_native)).alias(self.name) def __pow__(self, other: Any) -> PandasLikeSeries: ser, other = align_and_extract_native(self, other) return self._with_native(ser**other).alias(self.name) def __rpow__(self, other: Any) -> PandasLikeSeries: _, other_native = align_and_extract_native(self, other) return self._with_native(self.native.__rpow__(other_native)).alias(self.name) def __mod__(self, other: Any) -> PandasLikeSeries: ser, other = align_and_extract_native(self, other) return self._with_native(ser % other).alias(self.name) def __rmod__(self, other: Any) -> PandasLikeSeries: _, other_native = align_and_extract_native(self, other) return self._with_native(self.native.__rmod__(other_native)).alias(self.name) # Unary def __invert__(self: PandasLikeSeries) -> PandasLikeSeries: return self._with_native(~self.native) # Reductions def any(self) -> bool: return self.native.any() def all(self) -> bool: return self.native.all() def min(self) -> Any: return self.native.min() def max(self) -> Any: return self.native.max() def sum(self) -> float: return self.native.sum() def count(self) -> int: return self.native.count() def mean(self) -> float: return self.native.mean() def median(self) -> float: if not self.dtype.is_numeric(): msg = "`median` operation not supported for non-numeric input type." raise InvalidOperationError(msg) return self.native.median() def std(self, *, ddof: int) -> float: return self.native.std(ddof=ddof) def var(self, *, ddof: int) -> float: return self.native.var(ddof=ddof) def skew(self) -> float | None: ser_not_null = self.native.dropna() if len(ser_not_null) == 0: return None elif len(ser_not_null) == 1: return float("nan") elif len(ser_not_null) == 2: return 0.0 else: m = ser_not_null - ser_not_null.mean() m2 = (m**2).mean() m3 = (m**3).mean() return m3 / (m2**1.5) if m2 != 0 else float("nan") def len(self) -> int: return len(self.native) # Transformations def is_null(self) -> PandasLikeSeries: return self._with_native(self.native.isna(), preserve_broadcast=True) def is_nan(self) -> PandasLikeSeries: ser = self.native if self.dtype.is_numeric(): return self._with_native(ser != ser, preserve_broadcast=True) # noqa: PLR0124 msg = f"`.is_nan` only supported for numeric dtype and not {self.dtype}, did you mean `.is_null`?" raise InvalidOperationError(msg) def fill_null( self, value: Self | NonNestedLiteral, strategy: FillNullStrategy | None, limit: int | None, ) -> Self: ser = self.native if value is not None: _, native_value = align_and_extract_native(self, value) res_ser = self._with_native( ser.fillna(value=native_value), preserve_broadcast=True ) else: res_ser = self._with_native( ser.ffill(limit=limit) if strategy == "forward" else ser.bfill(limit=limit), preserve_broadcast=True, ) return res_ser def drop_nulls(self) -> PandasLikeSeries: return self._with_native(self.native.dropna()) def n_unique(self) -> int: return self.native.nunique(dropna=False) def sample( self, n: int | None, *, fraction: float | None, with_replacement: bool, seed: int | None, ) -> Self: return self._with_native( self.native.sample( n=n, frac=fraction, replace=with_replacement, random_state=seed ) ) def abs(self) -> PandasLikeSeries: return self._with_native(self.native.abs()) def cum_sum(self, *, reverse: bool) -> Self: result = ( self.native.cumsum(skipna=True) if not reverse else self.native[::-1].cumsum(skipna=True)[::-1] ) return self._with_native(result) def unique(self, *, maintain_order: bool = True) -> PandasLikeSeries: """Pandas always maintains order, as per its docstring. > Uniques are returned in order of appearance. """ return self._with_native( self.native.__class__(self.native.unique(), name=self.name) ) def diff(self) -> PandasLikeSeries: return self._with_native(self.native.diff()) def shift(self, n: int) -> PandasLikeSeries: return self._with_native(self.native.shift(n)) def replace_strict( self, old: Sequence[Any] | Mapping[Any, Any], new: Sequence[Any], *, return_dtype: IntoDType | None, ) -> PandasLikeSeries: tmp_name = f"{self.name}_tmp" dtype_backend = get_dtype_backend(self.native.dtype, self._implementation) dtype = ( narwhals_to_native_dtype( return_dtype, dtype_backend, self._implementation, self._backend_version, self._version, ) if return_dtype else None ) namespace = self.__native_namespace__() other = namespace.DataFrame( {self.name: old, tmp_name: namespace.Series(new, dtype=dtype)} ) result = self._with_native( self.native.to_frame().merge(other, on=self.name, how="left")[tmp_name] ).alias(self.name) if result.is_null().sum() != self.is_null().sum(): msg = ( "replace_strict did not replace all non-null values.\n\n" f"The following did not get replaced: {self.filter(~self.is_null() & result.is_null()).unique(maintain_order=False).to_list()}" ) raise ValueError(msg) return result def sort(self, *, descending: bool, nulls_last: bool) -> PandasLikeSeries: na_position = "last" if nulls_last else "first" return self._with_native( self.native.sort_values(ascending=not descending, na_position=na_position) ).alias(self.name) def alias(self, name: str | Hashable) -> Self: if name != self.name: return self._with_native( rename( self.native, name, implementation=self._implementation, backend_version=self._backend_version, ), preserve_broadcast=True, ) return self def __array__(self, dtype: Any, *, copy: bool | None) -> _1DArray: # pandas used to always return object dtype for nullable dtypes. # So, we intercept __array__ and pass to `to_numpy` ourselves to make # sure an appropriate numpy dtype is returned. return self.to_numpy(dtype=dtype, copy=copy) def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray: # the default is meant to be None, but pandas doesn't allow it? # https://numpy.org/doc/stable/reference/generated/numpy.ndarray.__array__.html dtypes = self._version.dtypes if isinstance(self.dtype, dtypes.Datetime) and self.dtype.time_zone is not None: s = self.dt.convert_time_zone("UTC").dt.replace_time_zone(None).native else: s = self.native has_missing = s.isna().any() kwargs: dict[Any, Any] = {"copy": copy or self._implementation.is_cudf()} if has_missing and str(s.dtype) in PANDAS_TO_NUMPY_DTYPE_MISSING: if self._implementation is Implementation.PANDAS and self._backend_version < ( 1, ): # pragma: no cover ... else: kwargs.update({"na_value": float("nan")}) dtype = dtype or PANDAS_TO_NUMPY_DTYPE_MISSING[str(s.dtype)] if not has_missing and str(s.dtype) in PANDAS_TO_NUMPY_DTYPE_NO_MISSING: dtype = dtype or PANDAS_TO_NUMPY_DTYPE_NO_MISSING[str(s.dtype)] return s.to_numpy(dtype=dtype, **kwargs) def to_pandas(self) -> pd.Series[Any]: if self._implementation is Implementation.PANDAS: return self.native elif self._implementation is Implementation.CUDF: # pragma: no cover return self.native.to_pandas() elif self._implementation is Implementation.MODIN: return self.native._to_pandas() msg = f"Unknown implementation: {self._implementation}" # pragma: no cover raise AssertionError(msg) def to_polars(self) -> pl.Series: import polars as pl # ignore-banned-import return pl.from_pandas(self.to_pandas()) # --- descriptive --- def is_unique(self) -> Self: return self._with_native(~self.native.duplicated(keep=False)).alias(self.name) def null_count(self) -> int: return self.native.isna().sum() def is_first_distinct(self) -> Self: return self._with_native(~self.native.duplicated(keep="first")).alias(self.name) def is_last_distinct(self) -> Self: return self._with_native(~self.native.duplicated(keep="last")).alias(self.name) def is_sorted(self, *, descending: bool) -> bool: if not isinstance(descending, bool): msg = f"argument 'descending' should be boolean, found {type(descending)}" raise TypeError(msg) if descending: return self.native.is_monotonic_decreasing else: return self.native.is_monotonic_increasing def value_counts( self, *, sort: bool, parallel: bool, name: str | None, normalize: bool ) -> PandasLikeDataFrame: """Parallel is unused, exists for compatibility.""" from narwhals._pandas_like.dataframe import PandasLikeDataFrame index_name_ = "index" if self._name is None else self._name value_name_ = name or ("proportion" if normalize else "count") val_count = self.native.value_counts( dropna=False, sort=False, normalize=normalize ).reset_index() val_count.columns = [index_name_, value_name_] if sort: val_count = val_count.sort_values(value_name_, ascending=False) return PandasLikeDataFrame.from_native(val_count, context=self) def quantile( self, quantile: float, interpolation: RollingInterpolationMethod ) -> float: return self.native.quantile(q=quantile, interpolation=interpolation) def zip_with(self, mask: Any, other: Any) -> PandasLikeSeries: ser = self.native _, mask = align_and_extract_native(self, mask) _, other = align_and_extract_native(self, other) res = ser.where(mask, other) return self._with_native(res) def head(self, n: int) -> Self: return self._with_native(self.native.head(n)) def tail(self, n: int) -> Self: return self._with_native(self.native.tail(n)) def round(self, decimals: int) -> Self: return self._with_native(self.native.round(decimals=decimals)) def to_dummies(self, *, separator: str, drop_first: bool) -> PandasLikeDataFrame: from narwhals._pandas_like.dataframe import PandasLikeDataFrame plx = self.__native_namespace__() series = self.native name = str(self._name) if self._name else "" null_col_pl = f"{name}{separator}null" has_nulls = series.isna().any() result = plx.get_dummies( series, prefix=name, prefix_sep=separator, drop_first=drop_first, # Adds a null column at the end, depending on whether or not there are any. dummy_na=has_nulls, dtype="int8", ) if has_nulls: *cols, null_col_pd = list(result.columns) output_order = [null_col_pd, *cols] result = rename( select_columns_by_name( result, output_order, self._backend_version, self._implementation ), columns={null_col_pd: null_col_pl}, implementation=self._implementation, backend_version=self._backend_version, ) return PandasLikeDataFrame.from_native(result, context=self) def gather_every(self, n: int, offset: int) -> Self: return self._with_native(self.native.iloc[offset::n]) def clip( self, lower_bound: Self | NumericLiteral | TemporalLiteral | None, upper_bound: Self | NumericLiteral | TemporalLiteral | None, ) -> Self: _, lower = ( align_and_extract_native(self, lower_bound) if lower_bound else (None, None) ) _, upper = ( align_and_extract_native(self, upper_bound) if upper_bound else (None, None) ) kwargs = {"axis": 0} if self._implementation is Implementation.MODIN else {} return self._with_native(self.native.clip(lower, upper, **kwargs)) def to_arrow(self) -> pa.Array[Any]: if self._implementation is Implementation.CUDF: return self.native.to_arrow() import pyarrow as pa # ignore-banned-import() return pa.Array.from_pandas(self.native) def mode(self) -> Self: result = self.native.mode() result.name = self.name return self._with_native(result) def cum_count(self, *, reverse: bool) -> Self: not_na_series = ~self.native.isna() result = ( not_na_series.cumsum() if not reverse else len(self) - not_na_series.cumsum() + not_na_series - 1 ) return self._with_native(result) def cum_min(self, *, reverse: bool) -> Self: result = ( self.native.cummin(skipna=True) if not reverse else self.native[::-1].cummin(skipna=True)[::-1] ) return self._with_native(result) def cum_max(self, *, reverse: bool) -> Self: result = ( self.native.cummax(skipna=True) if not reverse else self.native[::-1].cummax(skipna=True)[::-1] ) return self._with_native(result) def cum_prod(self, *, reverse: bool) -> Self: result = ( self.native.cumprod(skipna=True) if not reverse else self.native[::-1].cumprod(skipna=True)[::-1] ) return self._with_native(result) def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self: result = self.native.rolling( window=window_size, min_periods=min_samples, center=center ).sum() return self._with_native(result) def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self: result = self.native.rolling( window=window_size, min_periods=min_samples, center=center ).mean() return self._with_native(result) def rolling_var( self, window_size: int, *, min_samples: int, center: bool, ddof: int ) -> Self: result = self.native.rolling( window=window_size, min_periods=min_samples, center=center ).var(ddof=ddof) return self._with_native(result) def rolling_std( self, window_size: int, *, min_samples: int, center: bool, ddof: int ) -> Self: result = self.native.rolling( window=window_size, min_periods=min_samples, center=center ).std(ddof=ddof) return self._with_native(result) def __iter__(self) -> Iterator[Any]: yield from self.native.__iter__() def __contains__(self, other: Any) -> bool: return self.native.isna().any() if other is None else (self.native == other).any() def is_finite(self) -> Self: s = self.native return self._with_native((s > float("-inf")) & (s < float("inf"))) def rank(self, method: RankMethod, *, descending: bool) -> Self: pd_method = "first" if method == "ordinal" else method name = self.name if ( self._implementation is Implementation.PANDAS and self._backend_version < (3,) and self.dtype.is_integer() and (null_mask := self.native.isna()).any() ): # crazy workaround for the case of `na_option="keep"` and nullable # integer dtypes. This should be supported in pandas > 3.0 # https://github.com/pandas-dev/pandas/issues/56976 ranked_series = ( self.native.to_frame() .assign(**{f"{name}_is_null": null_mask}) .groupby(f"{name}_is_null") .rank( method=pd_method, na_option="keep", ascending=not descending, pct=False, )[name] ) else: ranked_series = self.native.rank( method=pd_method, na_option="keep", ascending=not descending, pct=False ) return self._with_native(ranked_series) def hist( # noqa: C901, PLR0912 self, bins: list[float | int] | None, *, bin_count: int | None, include_breakpoint: bool, ) -> PandasLikeDataFrame: from numpy import linspace, zeros from narwhals._pandas_like.dataframe import PandasLikeDataFrame ns = self.__native_namespace__() data: dict[str, Sequence[int | float | str] | _AnyDArray] if bin_count == 0 or (bins is not None and len(bins) <= 1): data = {} if include_breakpoint: data["breakpoint"] = [] data["count"] = [] return PandasLikeDataFrame.from_native(ns.DataFrame(data), context=self) if self.native.count() < 1: if bins is not None: data = {"breakpoint": bins[1:], "count": zeros(shape=len(bins) - 1)} else: count = cast("int", bin_count) if bin_count == 1: data = {"breakpoint": [1.0], "count": [0]} else: data = { "breakpoint": linspace(0, 1, count + 1)[1:], "count": zeros(shape=count), } if not include_breakpoint: del data["breakpoint"] return PandasLikeDataFrame.from_native(ns.DataFrame(data), context=self) if bin_count is not None: # use Polars binning behavior lower, upper = self.native.min(), self.native.max() if lower == upper: lower -= 0.5 upper += 0.5 if bin_count == 1: data = {"breakpoint": [upper], "count": [self.native.count()]} if not include_breakpoint: del data["breakpoint"] return PandasLikeDataFrame.from_native(ns.DataFrame(data), context=self) bins = linspace(lower, upper, bin_count + 1) bin_count = None # pandas (2.2.*) .value_counts(bins=int) adjusts the lowest bin twice, result in improper counts. # pandas (2.2.*) .value_counts(bins=[...]) adjusts the lowest bin which should not happen since # the bins were explicitly passed in. categories = ns.cut( self.native, bins=bins if bin_count is None else bin_count, include_lowest=True, # Polars 1.27.0 always includes the lowest bin ) # modin (0.32.0) .value_counts(...) silently drops bins with empty observations, .reindex # is necessary to restore these bins. result = categories.value_counts(dropna=True, sort=False).reindex( categories.cat.categories, fill_value=0 ) data = {} if include_breakpoint: data["breakpoint"] = bins[1:] if bins is not None else result.index.right data["count"] = result.reset_index(drop=True) return PandasLikeDataFrame.from_native(ns.DataFrame(data), context=self) def log(self, base: float) -> Self: native = self.native implementation = self._implementation dtype_backend = get_dtype_backend(native.dtype, implementation=implementation) if implementation.is_cudf(): import cupy as cp # ignore-banned-import # cuDF dependency. native = self.native log_arr = cp.log(native) / cp.log(base) result_native = type(native)(log_arr, index=native.index, name=native.name) return self._with_native(result_native) if dtype_backend == "pyarrow": import pyarrow.compute as pc from narwhals._arrow.utils import native_to_narwhals_dtype ca = native.array._pa_array result_arr = cast("ChunkedArrayAny", pc.logb(ca, base)) nw_dtype = native_to_narwhals_dtype(result_arr.type, self._version) out_dtype = narwhals_to_native_dtype( nw_dtype, "pyarrow", self._implementation, self._backend_version, self._version, ) result_native = native.__class__( result_arr, dtype=out_dtype, index=native.index, name=native.name ) else: result_native = np.log(native) / np.log(base) return self._with_native(result_native) def exp(self) -> Self: native = self.native implementation = self._implementation dtype_backend = get_dtype_backend(native.dtype, implementation=implementation) if implementation.is_cudf(): import cupy as cp # ignore-banned-import # cuDF dependency. native = self.native exp_arr = cp.exp(native) result_native = type(native)(exp_arr, index=native.index, name=native.name) return self._with_native(result_native) if dtype_backend == "pyarrow": import pyarrow.compute as pc from narwhals._arrow.utils import native_to_narwhals_dtype ca = native.array._pa_array result_arr = cast("ChunkedArrayAny", pc.exp(ca)) nw_dtype = native_to_narwhals_dtype(result_arr.type, self._version) out_dtype = narwhals_to_native_dtype( nw_dtype, "pyarrow", self._implementation, self._backend_version, self._version, ) result_native = native.__class__( result_arr, dtype=out_dtype, index=native.index, name=native.name ) else: result_native = np.exp(native) return self._with_native(result_native) @property def str(self) -> PandasLikeSeriesStringNamespace: return PandasLikeSeriesStringNamespace(self) @property def dt(self) -> PandasLikeSeriesDateTimeNamespace: return PandasLikeSeriesDateTimeNamespace(self) @property def cat(self) -> PandasLikeSeriesCatNamespace: return PandasLikeSeriesCatNamespace(self) @property def list(self) -> PandasLikeSeriesListNamespace: if not hasattr(self.native, "list"): msg = "Series must be of PyArrow List type to support list namespace." raise TypeError(msg) return PandasLikeSeriesListNamespace(self) @property def struct(self) -> PandasLikeSeriesStructNamespace: if not hasattr(self.native, "struct"): msg = "Series must be of PyArrow Struct type to support struct namespace." raise TypeError(msg) return PandasLikeSeriesStructNamespace(self)