aboutsummaryrefslogtreecommitdiff
path: root/venv/lib/python3.8/site-packages/narwhals/_pandas_like
diff options
context:
space:
mode:
authorsotech117 <michael_foiani@brown.edu>2025-07-31 17:27:24 -0400
committersotech117 <michael_foiani@brown.edu>2025-07-31 17:27:24 -0400
commit5bf22fc7e3c392c8bd44315ca2d06d7dca7d084e (patch)
tree8dacb0f195df1c0788d36dd0064f6bbaa3143ede /venv/lib/python3.8/site-packages/narwhals/_pandas_like
parentb832d364da8c2efe09e3f75828caf73c50d01ce3 (diff)
add code for analysis of data
Diffstat (limited to 'venv/lib/python3.8/site-packages/narwhals/_pandas_like')
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/__init__.py0
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/dataframe.py1148
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/expr.py402
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/group_by.py293
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/namespace.py332
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/selectors.py34
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/series.py1109
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_cat.py17
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_dt.py237
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_list.py33
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_str.py79
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_struct.py16
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/typing.py15
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/utils.py673
14 files changed, 4388 insertions, 0 deletions
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/__init__.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/__init__.py
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/dataframe.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/dataframe.py
new file mode 100644
index 0000000..bf5287f
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/dataframe.py
@@ -0,0 +1,1148 @@
+from __future__ import annotations
+
+from itertools import chain, product
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Iterable,
+ Iterator,
+ Literal,
+ Mapping,
+ Sequence,
+ cast,
+ overload,
+)
+
+import numpy as np
+
+from narwhals._compliant import EagerDataFrame
+from narwhals._pandas_like.series import PANDAS_TO_NUMPY_DTYPE_MISSING, PandasLikeSeries
+from narwhals._pandas_like.utils import (
+ align_and_extract_native,
+ align_series_full_broadcast,
+ check_column_names_are_unique,
+ get_dtype_backend,
+ native_to_narwhals_dtype,
+ object_native_to_narwhals_dtype,
+ rename,
+ select_columns_by_name,
+ set_index,
+)
+from narwhals._utils import (
+ Implementation,
+ _into_arrow_table,
+ _remap_full_join_keys,
+ exclude_column_names,
+ generate_temporary_column_name,
+ parse_columns_to_drop,
+ parse_version,
+ scale_bytes,
+ validate_backend_version,
+)
+from narwhals.dependencies import is_pandas_like_dataframe
+from narwhals.exceptions import InvalidOperationError, ShapeError
+
+if TYPE_CHECKING:
+ from io import BytesIO
+ from pathlib import Path
+ from types import ModuleType
+
+ import pandas as pd
+ import polars as pl
+ from typing_extensions import Self, TypeAlias, TypeIs
+
+ from narwhals._compliant.typing import CompliantDataFrameAny, CompliantLazyFrameAny
+ from narwhals._pandas_like.expr import PandasLikeExpr
+ from narwhals._pandas_like.group_by import PandasLikeGroupBy
+ from narwhals._pandas_like.namespace import PandasLikeNamespace
+ from narwhals._translate import IntoArrowTable
+ from narwhals._utils import Version, _FullContext
+ from narwhals.dtypes import DType
+ from narwhals.schema import Schema
+ from narwhals.typing import (
+ AsofJoinStrategy,
+ DTypeBackend,
+ JoinStrategy,
+ PivotAgg,
+ SizedMultiIndexSelector,
+ SizedMultiNameSelector,
+ SizeUnit,
+ UniqueKeepStrategy,
+ _2DArray,
+ _SliceIndex,
+ _SliceName,
+ )
+
+ Constructor: TypeAlias = Callable[..., pd.DataFrame]
+
+
+CLASSICAL_NUMPY_DTYPES: frozenset[np.dtype[Any]] = frozenset(
+ [
+ np.dtype("float64"),
+ np.dtype("float32"),
+ np.dtype("int64"),
+ np.dtype("int32"),
+ np.dtype("int16"),
+ np.dtype("int8"),
+ np.dtype("uint64"),
+ np.dtype("uint32"),
+ np.dtype("uint16"),
+ np.dtype("uint8"),
+ np.dtype("bool"),
+ np.dtype("datetime64[s]"),
+ np.dtype("datetime64[ms]"),
+ np.dtype("datetime64[us]"),
+ np.dtype("datetime64[ns]"),
+ np.dtype("timedelta64[s]"),
+ np.dtype("timedelta64[ms]"),
+ np.dtype("timedelta64[us]"),
+ np.dtype("timedelta64[ns]"),
+ np.dtype("object"),
+ ]
+)
+
+
+class PandasLikeDataFrame(EagerDataFrame["PandasLikeSeries", "PandasLikeExpr", "Any"]):
+ def __init__(
+ self,
+ native_dataframe: Any,
+ *,
+ implementation: Implementation,
+ backend_version: tuple[int, ...],
+ version: Version,
+ validate_column_names: bool,
+ ) -> None:
+ self._native_frame = native_dataframe
+ self._implementation = implementation
+ self._backend_version = backend_version
+ self._version = version
+ validate_backend_version(self._implementation, self._backend_version)
+ if validate_column_names:
+ check_column_names_are_unique(native_dataframe.columns)
+
+ @classmethod
+ def from_arrow(cls, data: IntoArrowTable, /, *, context: _FullContext) -> Self:
+ implementation = context._implementation
+ tbl = _into_arrow_table(data, context)
+ if implementation.is_pandas():
+ native = tbl.to_pandas()
+ elif implementation.is_modin(): # pragma: no cover
+ from modin.pandas.utils import (
+ from_arrow as mpd_from_arrow, # pyright: ignore[reportAttributeAccessIssue]
+ )
+
+ native = mpd_from_arrow(tbl)
+ elif implementation.is_cudf(): # pragma: no cover
+ native = implementation.to_native_namespace().DataFrame.from_arrow(tbl)
+ else: # pragma: no cover
+ msg = "congratulations, you entered unreachable code - please report a bug"
+ raise AssertionError(msg)
+ return cls.from_native(native, context=context)
+
+ @classmethod
+ def from_dict(
+ cls,
+ data: Mapping[str, Any],
+ /,
+ *,
+ context: _FullContext,
+ schema: Mapping[str, DType] | Schema | None,
+ ) -> Self:
+ from narwhals.schema import Schema
+
+ implementation = context._implementation
+ ns = implementation.to_native_namespace()
+ Series = cast("type[pd.Series[Any]]", ns.Series) # noqa: N806
+ DataFrame = cast("type[pd.DataFrame]", ns.DataFrame) # noqa: N806
+ aligned_data: dict[str, pd.Series[Any] | Any] = {}
+ left_most: PandasLikeSeries | None = None
+ for name, series in data.items():
+ if isinstance(series, Series):
+ compliant = PandasLikeSeries.from_native(series, context=context)
+ if left_most is None:
+ left_most = compliant
+ aligned_data[name] = series
+ else:
+ aligned_data[name] = align_and_extract_native(left_most, compliant)[1]
+ else:
+ aligned_data[name] = series
+
+ native = DataFrame.from_dict(aligned_data)
+ if schema:
+ it: Iterable[DTypeBackend] = (
+ get_dtype_backend(dtype, implementation) for dtype in native.dtypes
+ )
+ native = native.astype(Schema(schema).to_pandas(it))
+ return cls.from_native(native, context=context)
+
+ @staticmethod
+ def _is_native(obj: Any) -> TypeIs[Any]:
+ return is_pandas_like_dataframe(obj) # pragma: no cover
+
+ @classmethod
+ def from_native(cls, data: Any, /, *, context: _FullContext) -> Self:
+ return cls(
+ data,
+ implementation=context._implementation,
+ backend_version=context._backend_version,
+ version=context._version,
+ validate_column_names=True,
+ )
+
+ @classmethod
+ def from_numpy(
+ cls,
+ data: _2DArray,
+ /,
+ *,
+ context: _FullContext,
+ schema: Mapping[str, DType] | Schema | Sequence[str] | None,
+ ) -> Self:
+ from narwhals.schema import Schema
+
+ implementation = context._implementation
+ DataFrame: Constructor = implementation.to_native_namespace().DataFrame # noqa: N806
+ if isinstance(schema, (Mapping, Schema)):
+ it: Iterable[DTypeBackend] = (
+ get_dtype_backend(native_type, implementation)
+ for native_type in schema.values()
+ )
+ native = DataFrame(data, columns=schema.keys()).astype(
+ Schema(schema).to_pandas(it)
+ )
+ else:
+ native = DataFrame(data, columns=cls._numpy_column_names(data, schema))
+ return cls.from_native(native, context=context)
+
+ def __narwhals_dataframe__(self) -> Self:
+ return self
+
+ def __narwhals_lazyframe__(self) -> Self:
+ return self
+
+ def __narwhals_namespace__(self) -> PandasLikeNamespace:
+ from narwhals._pandas_like.namespace import PandasLikeNamespace
+
+ return PandasLikeNamespace(
+ self._implementation, self._backend_version, version=self._version
+ )
+
+ def __native_namespace__(self) -> ModuleType:
+ if self._implementation in {
+ Implementation.PANDAS,
+ Implementation.MODIN,
+ Implementation.CUDF,
+ }:
+ return self._implementation.to_native_namespace()
+
+ msg = f"Expected pandas/modin/cudf, got: {type(self._implementation)}" # pragma: no cover
+ raise AssertionError(msg)
+
+ def __len__(self) -> int:
+ return len(self.native)
+
+ def _with_version(self, version: Version) -> Self:
+ return self.__class__(
+ self.native,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=version,
+ validate_column_names=False,
+ )
+
+ def _with_native(self, df: Any, *, validate_column_names: bool = True) -> Self:
+ return self.__class__(
+ df,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ validate_column_names=validate_column_names,
+ )
+
+ def _extract_comparand(self, other: PandasLikeSeries) -> pd.Series[Any]:
+ index = self.native.index
+ if other._broadcast:
+ s = other.native
+ return type(s)(s.iloc[0], index=index, dtype=s.dtype, name=s.name)
+ if (len_other := len(other)) != (len_idx := len(index)):
+ msg = f"Expected object of length {len_idx}, got: {len_other}."
+ raise ShapeError(msg)
+ if other.native.index is not index:
+ return set_index(
+ other.native,
+ index,
+ implementation=other._implementation,
+ backend_version=other._backend_version,
+ )
+ return other.native
+
+ def get_column(self, name: str) -> PandasLikeSeries:
+ return PandasLikeSeries.from_native(self.native[name], context=self)
+
+ def __array__(self, dtype: Any = None, *, copy: bool | None = None) -> _2DArray:
+ return self.to_numpy(dtype=dtype, copy=copy)
+
+ def _gather(self, rows: SizedMultiIndexSelector[pd.Series[Any]]) -> Self:
+ items = list(rows) if isinstance(rows, tuple) else rows
+ return self._with_native(self.native.iloc[items, :])
+
+ def _gather_slice(self, rows: _SliceIndex | range) -> Self:
+ return self._with_native(
+ self.native.iloc[slice(rows.start, rows.stop, rows.step), :],
+ validate_column_names=False,
+ )
+
+ def _select_slice_name(self, columns: _SliceName) -> Self:
+ start = (
+ self.native.columns.get_loc(columns.start)
+ if columns.start is not None
+ else None
+ )
+ stop = (
+ self.native.columns.get_loc(columns.stop) + 1
+ if columns.stop is not None
+ else None
+ )
+ selector = slice(start, stop, columns.step)
+ return self._with_native(
+ self.native.iloc[:, selector], validate_column_names=False
+ )
+
+ def _select_slice_index(self, columns: _SliceIndex | range) -> Self:
+ return self._with_native(
+ self.native.iloc[:, columns], validate_column_names=False
+ )
+
+ def _select_multi_index(
+ self, columns: SizedMultiIndexSelector[pd.Series[Any]]
+ ) -> Self:
+ columns = list(columns) if isinstance(columns, tuple) else columns
+ return self._with_native(
+ self.native.iloc[:, columns], validate_column_names=False
+ )
+
+ def _select_multi_name(
+ self, columns: SizedMultiNameSelector[pd.Series[Any]]
+ ) -> PandasLikeDataFrame:
+ return self._with_native(self.native.loc[:, columns])
+
+ # --- properties ---
+ @property
+ def columns(self) -> list[str]:
+ return self.native.columns.tolist()
+
+ @overload
+ def rows(self, *, named: Literal[True]) -> list[dict[str, Any]]: ...
+
+ @overload
+ def rows(self, *, named: Literal[False]) -> list[tuple[Any, ...]]: ...
+
+ @overload
+ def rows(self, *, named: bool) -> list[tuple[Any, ...]] | list[dict[str, Any]]: ...
+
+ def rows(self, *, named: bool) -> list[tuple[Any, ...]] | list[dict[str, Any]]:
+ if not named:
+ # cuDF does not support itertuples. But it does support to_dict!
+ if self._implementation is Implementation.CUDF:
+ # Extract the row values from the named rows
+ return [tuple(row.values()) for row in self.rows(named=True)]
+
+ return list(self.native.itertuples(index=False, name=None))
+
+ return self.native.to_dict(orient="records")
+
+ def iter_columns(self) -> Iterator[PandasLikeSeries]:
+ for _name, series in self.native.items(): # noqa: PERF102
+ yield PandasLikeSeries.from_native(series, context=self)
+
+ _iter_columns = iter_columns
+
+ def iter_rows(
+ self, *, named: bool, buffer_size: int
+ ) -> Iterator[tuple[Any, ...]] | Iterator[dict[str, Any]]:
+ # The param ``buffer_size`` is only here for compatibility with the Polars API
+ # and has no effect on the output.
+ if not named:
+ yield from self.native.itertuples(index=False, name=None)
+ else:
+ col_names = self.native.columns
+ for row in self.native.itertuples(index=False):
+ yield dict(zip(col_names, row))
+
+ @property
+ def schema(self) -> dict[str, DType]:
+ native_dtypes = self.native.dtypes
+ return {
+ col: native_to_narwhals_dtype(
+ native_dtypes[col], self._version, self._implementation
+ )
+ if native_dtypes[col] != "object"
+ else object_native_to_narwhals_dtype(
+ self.native[col], self._version, self._implementation
+ )
+ for col in self.native.columns
+ }
+
+ def collect_schema(self) -> dict[str, DType]:
+ return self.schema
+
+ # --- reshape ---
+ def simple_select(self, *column_names: str) -> Self:
+ return self._with_native(
+ select_columns_by_name(
+ self.native,
+ list(column_names),
+ self._backend_version,
+ self._implementation,
+ ),
+ validate_column_names=False,
+ )
+
+ def select(self: PandasLikeDataFrame, *exprs: PandasLikeExpr) -> PandasLikeDataFrame:
+ new_series = self._evaluate_into_exprs(*exprs)
+ if not new_series:
+ # return empty dataframe, like Polars does
+ return self._with_native(self.native.__class__(), validate_column_names=False)
+ new_series = align_series_full_broadcast(*new_series)
+ namespace = self.__narwhals_namespace__()
+ df = namespace._concat_horizontal([s.native for s in new_series])
+ # `concat` creates a new object, so fine to modify `.columns.name` inplace.
+ df.columns.name = self.native.columns.name
+ return self._with_native(df, validate_column_names=True)
+
+ def drop_nulls(
+ self: PandasLikeDataFrame, subset: Sequence[str] | None
+ ) -> PandasLikeDataFrame:
+ if subset is None:
+ return self._with_native(
+ self.native.dropna(axis=0), validate_column_names=False
+ )
+ plx = self.__narwhals_namespace__()
+ return self.filter(~plx.any_horizontal(plx.col(*subset).is_null()))
+
+ def estimated_size(self, unit: SizeUnit) -> int | float:
+ sz = self.native.memory_usage(deep=True).sum()
+ return scale_bytes(sz, unit=unit)
+
+ def with_row_index(self, name: str) -> Self:
+ frame = self.native
+ namespace = self.__narwhals_namespace__()
+ row_index = namespace._series.from_iterable(
+ range(len(frame)), context=self, index=frame.index
+ ).alias(name)
+ return self._with_native(namespace._concat_horizontal([row_index.native, frame]))
+
+ def row(self, index: int) -> tuple[Any, ...]:
+ return tuple(x for x in self.native.iloc[index])
+
+ def filter(
+ self: PandasLikeDataFrame, predicate: PandasLikeExpr | list[bool]
+ ) -> PandasLikeDataFrame:
+ if isinstance(predicate, list):
+ mask_native: pd.Series[Any] | list[bool] = predicate
+ else:
+ # `[0]` is safe as the predicate's expression only returns a single column
+ mask = self._evaluate_into_exprs(predicate)[0]
+ mask_native = self._extract_comparand(mask)
+ return self._with_native(
+ self.native.loc[mask_native], validate_column_names=False
+ )
+
+ def with_columns(
+ self: PandasLikeDataFrame, *exprs: PandasLikeExpr
+ ) -> PandasLikeDataFrame:
+ columns = self._evaluate_into_exprs(*exprs)
+ if not columns and len(self) == 0:
+ return self
+ name_columns: dict[str, PandasLikeSeries] = {s.name: s for s in columns}
+ to_concat = []
+ # Make sure to preserve column order
+ for name in self.native.columns:
+ if name in name_columns:
+ series = self._extract_comparand(name_columns.pop(name))
+ else:
+ series = self.native[name]
+ to_concat.append(series)
+ to_concat.extend(self._extract_comparand(s) for s in name_columns.values())
+ namespace = self.__narwhals_namespace__()
+ df = namespace._concat_horizontal(to_concat)
+ # `concat` creates a new object, so fine to modify `.columns.name` inplace.
+ df.columns.name = self.native.columns.name
+ return self._with_native(df, validate_column_names=False)
+
+ def rename(self, mapping: Mapping[str, str]) -> Self:
+ return self._with_native(
+ rename(
+ self.native,
+ columns=mapping,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ )
+ )
+
+ def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
+ to_drop = parse_columns_to_drop(self, columns, strict=strict)
+ return self._with_native(
+ self.native.drop(columns=to_drop), validate_column_names=False
+ )
+
+ # --- transform ---
+ def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) -> Self:
+ df = self.native
+ if isinstance(descending, bool):
+ ascending: bool | list[bool] = not descending
+ else:
+ ascending = [not d for d in descending]
+ na_position = "last" if nulls_last else "first"
+ return self._with_native(
+ df.sort_values(list(by), ascending=ascending, na_position=na_position),
+ validate_column_names=False,
+ )
+
+ # --- convert ---
+ def collect(
+ self, backend: Implementation | None, **kwargs: Any
+ ) -> CompliantDataFrameAny:
+ if backend is None:
+ return PandasLikeDataFrame(
+ self.native,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ validate_column_names=False,
+ )
+
+ if backend is Implementation.PANDAS:
+ import pandas as pd # ignore-banned-import
+
+ return PandasLikeDataFrame(
+ self.to_pandas(),
+ implementation=Implementation.PANDAS,
+ backend_version=parse_version(pd),
+ version=self._version,
+ validate_column_names=False,
+ )
+
+ if backend is Implementation.PYARROW:
+ import pyarrow as pa # ignore-banned-import
+
+ from narwhals._arrow.dataframe import ArrowDataFrame
+
+ return ArrowDataFrame(
+ native_dataframe=self.to_arrow(),
+ backend_version=parse_version(pa),
+ version=self._version,
+ validate_column_names=False,
+ )
+
+ if backend is Implementation.POLARS:
+ import polars as pl # ignore-banned-import
+
+ from narwhals._polars.dataframe import PolarsDataFrame
+
+ return PolarsDataFrame(
+ df=self.to_polars(),
+ backend_version=parse_version(pl),
+ version=self._version,
+ )
+
+ msg = f"Unsupported `backend` value: {backend}" # pragma: no cover
+ raise ValueError(msg) # pragma: no cover
+
+ # --- actions ---
+ def group_by(
+ self, keys: Sequence[str] | Sequence[PandasLikeExpr], *, drop_null_keys: bool
+ ) -> PandasLikeGroupBy:
+ from narwhals._pandas_like.group_by import PandasLikeGroupBy
+
+ return PandasLikeGroupBy(self, keys, drop_null_keys=drop_null_keys)
+
+ def join( # noqa: C901, PLR0911, PLR0912
+ self,
+ other: Self,
+ *,
+ how: JoinStrategy,
+ left_on: Sequence[str] | None,
+ right_on: Sequence[str] | None,
+ suffix: str,
+ ) -> Self:
+ if how == "cross":
+ if (
+ self._implementation is Implementation.MODIN
+ or self._implementation is Implementation.CUDF
+ ) or (
+ self._implementation is Implementation.PANDAS
+ and self._backend_version < (1, 4)
+ ):
+ key_token = generate_temporary_column_name(
+ n_bytes=8, columns=[*self.columns, *other.columns]
+ )
+
+ return self._with_native(
+ self.native.assign(**{key_token: 0})
+ .merge(
+ other.native.assign(**{key_token: 0}),
+ how="inner",
+ left_on=key_token,
+ right_on=key_token,
+ suffixes=("", suffix),
+ )
+ .drop(columns=key_token)
+ )
+ else:
+ return self._with_native(
+ self.native.merge(other.native, how="cross", suffixes=("", suffix))
+ )
+
+ if how == "anti":
+ if self._implementation is Implementation.CUDF:
+ return self._with_native(
+ self.native.merge(
+ other.native, how="leftanti", left_on=left_on, right_on=right_on
+ )
+ )
+ else:
+ indicator_token = generate_temporary_column_name(
+ n_bytes=8, columns=[*self.columns, *other.columns]
+ )
+ if right_on is None: # pragma: no cover
+ msg = "`right_on` cannot be `None` in anti-join"
+ raise TypeError(msg)
+
+ # rename to avoid creating extra columns in join
+ other_native = rename(
+ select_columns_by_name(
+ other.native,
+ list(right_on),
+ self._backend_version,
+ self._implementation,
+ ),
+ columns=dict(zip(right_on, left_on)), # type: ignore[arg-type]
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ ).drop_duplicates()
+ return self._with_native(
+ self.native.merge(
+ other_native,
+ how="outer",
+ indicator=indicator_token,
+ left_on=left_on,
+ right_on=left_on,
+ )
+ .loc[lambda t: t[indicator_token] == "left_only"]
+ .drop(columns=indicator_token)
+ )
+
+ if how == "semi":
+ if right_on is None: # pragma: no cover
+ msg = "`right_on` cannot be `None` in semi-join"
+ raise TypeError(msg)
+ # rename to avoid creating extra columns in join
+ other_native = (
+ rename(
+ select_columns_by_name(
+ other.native,
+ list(right_on),
+ self._backend_version,
+ self._implementation,
+ ),
+ columns=dict(zip(right_on, left_on)), # type: ignore[arg-type]
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ ).drop_duplicates() # avoids potential rows duplication from inner join
+ )
+ return self._with_native(
+ self.native.merge(
+ other_native, how="inner", left_on=left_on, right_on=left_on
+ )
+ )
+
+ if how == "left":
+ result_native = self.native.merge(
+ other.native,
+ how="left",
+ left_on=left_on,
+ right_on=right_on,
+ suffixes=("", suffix),
+ )
+ extra = []
+ for left_key, right_key in zip(left_on, right_on): # type: ignore[arg-type]
+ if right_key != left_key and right_key not in self.columns:
+ extra.append(right_key)
+ elif right_key != left_key:
+ extra.append(f"{right_key}{suffix}")
+ return self._with_native(result_native.drop(columns=extra))
+
+ if how == "full":
+ # Pandas coalesces keys in full joins unless there's no collision
+
+ # help mypy
+ assert left_on is not None # noqa: S101
+ assert right_on is not None # noqa: S101
+
+ right_on_mapper = _remap_full_join_keys(left_on, right_on, suffix)
+ other_native = other.native.rename(columns=right_on_mapper)
+ check_column_names_are_unique(other_native.columns)
+ right_on = list(right_on_mapper.values()) # we now have the suffixed keys
+ return self._with_native(
+ self.native.merge(
+ other_native,
+ left_on=left_on,
+ right_on=right_on,
+ how="outer",
+ suffixes=("", suffix),
+ )
+ )
+
+ return self._with_native(
+ self.native.merge(
+ other.native,
+ left_on=left_on,
+ right_on=right_on,
+ how=how,
+ suffixes=("", suffix),
+ )
+ )
+
+ def join_asof(
+ self,
+ other: Self,
+ *,
+ left_on: str,
+ right_on: str,
+ by_left: Sequence[str] | None,
+ by_right: Sequence[str] | None,
+ strategy: AsofJoinStrategy,
+ suffix: str,
+ ) -> Self:
+ plx = self.__native_namespace__()
+ return self._with_native(
+ plx.merge_asof(
+ self.native,
+ other.native,
+ left_on=left_on,
+ right_on=right_on,
+ left_by=by_left,
+ right_by=by_right,
+ direction=strategy,
+ suffixes=("", suffix),
+ )
+ )
+
+ # --- partial reduction ---
+
+ def head(self, n: int) -> Self:
+ return self._with_native(self.native.head(n), validate_column_names=False)
+
+ def tail(self, n: int) -> Self:
+ return self._with_native(self.native.tail(n), validate_column_names=False)
+
+ def unique(
+ self,
+ subset: Sequence[str] | None,
+ *,
+ keep: UniqueKeepStrategy,
+ maintain_order: bool | None = None,
+ ) -> Self:
+ # The param `maintain_order` is only here for compatibility with the Polars API
+ # and has no effect on the output.
+ mapped_keep = {"none": False, "any": "first"}.get(keep, keep)
+ if subset and (error := self._check_columns_exist(subset)):
+ raise error
+ return self._with_native(
+ self.native.drop_duplicates(subset=subset, keep=mapped_keep),
+ validate_column_names=False,
+ )
+
+ # --- lazy-only ---
+ def lazy(self, *, backend: Implementation | None = None) -> CompliantLazyFrameAny:
+ from narwhals.utils import parse_version
+
+ pandas_df = self.to_pandas()
+ if backend is None:
+ return self
+ elif backend is Implementation.DUCKDB:
+ import duckdb # ignore-banned-import
+
+ from narwhals._duckdb.dataframe import DuckDBLazyFrame
+
+ return DuckDBLazyFrame(
+ df=duckdb.table("pandas_df"),
+ backend_version=parse_version(duckdb),
+ version=self._version,
+ )
+ elif backend is Implementation.POLARS:
+ import polars as pl # ignore-banned-import
+
+ from narwhals._polars.dataframe import PolarsLazyFrame
+
+ return PolarsLazyFrame(
+ df=pl.from_pandas(pandas_df).lazy(),
+ backend_version=parse_version(pl),
+ version=self._version,
+ )
+ elif backend is Implementation.DASK:
+ import dask # ignore-banned-import
+ import dask.dataframe as dd # ignore-banned-import
+
+ from narwhals._dask.dataframe import DaskLazyFrame
+
+ return DaskLazyFrame(
+ native_dataframe=dd.from_pandas(pandas_df),
+ backend_version=parse_version(dask),
+ version=self._version,
+ )
+ raise AssertionError # pragma: no cover
+
+ @property
+ def shape(self) -> tuple[int, int]:
+ return self.native.shape
+
+ def to_dict(self, *, as_series: bool) -> dict[str, Any]:
+ if as_series:
+ return {
+ col: PandasLikeSeries.from_native(self.native[col], context=self)
+ for col in self.columns
+ }
+ return self.native.to_dict(orient="list")
+
+ def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _2DArray:
+ native_dtypes = self.native.dtypes
+
+ if copy is None:
+ # pandas default differs from Polars, but cuDF default is True
+ copy = self._implementation is Implementation.CUDF
+
+ if native_dtypes.isin(CLASSICAL_NUMPY_DTYPES).all():
+ # Fast path, no conversions necessary.
+ if dtype is not None:
+ return self.native.to_numpy(dtype=dtype, copy=copy)
+ return self.native.to_numpy(copy=copy)
+
+ dtype_datetime = self._version.dtypes.Datetime
+ to_convert = [
+ key
+ for key, val in self.schema.items()
+ if isinstance(val, dtype_datetime) and val.time_zone is not None
+ ]
+ if to_convert:
+ df = self.with_columns(
+ self.__narwhals_namespace__()
+ .col(*to_convert)
+ .dt.convert_time_zone("UTC")
+ .dt.replace_time_zone(None)
+ ).native
+ else:
+ df = self.native
+
+ if dtype is not None:
+ return df.to_numpy(dtype=dtype, copy=copy)
+
+ # pandas return `object` dtype for nullable dtypes if dtype=None,
+ # so we cast each Series to numpy and let numpy find a common dtype.
+ # If there aren't any dtypes where `to_numpy()` is "broken" (i.e. it
+ # returns Object) then we just call `to_numpy()` on the DataFrame.
+ for col_dtype in native_dtypes:
+ if str(col_dtype) in PANDAS_TO_NUMPY_DTYPE_MISSING:
+ import numpy as np
+
+ arr: Any = np.hstack(
+ [
+ self.get_column(col).to_numpy(copy=copy, dtype=None)[:, None]
+ for col in self.columns
+ ]
+ )
+ return arr
+ return df.to_numpy(copy=copy)
+
+ def to_pandas(self) -> pd.DataFrame:
+ if self._implementation is Implementation.PANDAS:
+ return self.native
+ elif self._implementation is Implementation.CUDF:
+ return self.native.to_pandas()
+ elif self._implementation is Implementation.MODIN:
+ return self.native._to_pandas()
+ msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
+ raise AssertionError(msg)
+
+ def to_polars(self) -> pl.DataFrame:
+ import polars as pl # ignore-banned-import
+
+ return pl.from_pandas(self.to_pandas())
+
+ def write_parquet(self, file: str | Path | BytesIO) -> None:
+ self.native.to_parquet(file)
+
+ @overload
+ def write_csv(self, file: None) -> str: ...
+
+ @overload
+ def write_csv(self, file: str | Path | BytesIO) -> None: ...
+
+ def write_csv(self, file: str | Path | BytesIO | None) -> str | None:
+ return self.native.to_csv(file, index=False)
+
+ # --- descriptive ---
+ def is_unique(self) -> PandasLikeSeries:
+ return PandasLikeSeries.from_native(
+ ~self.native.duplicated(keep=False), context=self
+ )
+
+ def item(self, row: int | None, column: int | str | None) -> Any:
+ if row is None and column is None:
+ if self.shape != (1, 1):
+ msg = (
+ "can only call `.item()` if the dataframe is of shape (1, 1),"
+ " or if explicit row/col values are provided;"
+ f" frame has shape {self.shape!r}"
+ )
+ raise ValueError(msg)
+ return self.native.iloc[0, 0]
+
+ elif row is None or column is None:
+ msg = "cannot call `.item()` with only one of `row` or `column`"
+ raise ValueError(msg)
+
+ _col = self.columns.index(column) if isinstance(column, str) else column
+ return self.native.iloc[row, _col]
+
+ def clone(self) -> Self:
+ return self._with_native(self.native.copy(), validate_column_names=False)
+
+ def gather_every(self, n: int, offset: int) -> Self:
+ return self._with_native(self.native.iloc[offset::n], validate_column_names=False)
+
+ def _pivot_into_index_values(
+ self,
+ on: Sequence[str],
+ index: Sequence[str] | None,
+ values: Sequence[str] | None,
+ /,
+ ) -> tuple[Sequence[str], Sequence[str]]:
+ index = index or (
+ exclude_column_names(self, {*on, *values})
+ if values
+ else exclude_column_names(self, on)
+ )
+ values = values or exclude_column_names(self, {*on, *index})
+ return index, values
+
+ @staticmethod
+ def _pivot_multi_on_name(unique_values: tuple[str, ...], /) -> str:
+ LB, RB, Q = "{", "}", '"' # noqa: N806
+ body = '","'.join(unique_values)
+ return f"{LB}{Q}{body}{Q}{RB}"
+
+ @staticmethod
+ def _pivot_single_on_names(
+ column_names: Iterable[str], n_values: int, separator: str, /
+ ) -> list[str]:
+ if n_values > 1:
+ return [separator.join(col).strip() for col in column_names]
+ return [col[-1] for col in column_names]
+
+ def _pivot_multi_on_names(
+ self,
+ column_names: Iterable[tuple[str, ...]],
+ n_on: int,
+ n_values: int,
+ separator: str,
+ /,
+ ) -> Iterator[str]:
+ if n_values > 1:
+ for col in column_names:
+ names = col[-n_on:]
+ prefix = col[0]
+ yield separator.join((prefix, self._pivot_multi_on_name(names)))
+ else:
+ for col in column_names:
+ yield self._pivot_multi_on_name(col[-n_on:])
+
+ def _pivot_remap_column_names(
+ self, column_names: Iterable[Any], *, n_on: int, n_values: int, separator: str
+ ) -> list[str]:
+ """Reformat output column names from a native pivot operation, to match `polars`.
+
+ Note:
+ `column_names` is a `pd.MultiIndex`, but not in the stubs.
+ """
+ if n_on == 1:
+ return self._pivot_single_on_names(column_names, n_values, separator)
+ return list(self._pivot_multi_on_names(column_names, n_on, n_values, separator))
+
+ def _pivot_table(
+ self,
+ on: Sequence[str],
+ index: Sequence[str],
+ values: Sequence[str],
+ aggregate_function: Literal[
+ "min", "max", "first", "last", "sum", "mean", "median"
+ ],
+ /,
+ ) -> Any:
+ categorical = self._version.dtypes.Categorical
+ kwds: dict[Any, Any] = {"observed": True}
+ if self._implementation is Implementation.CUDF:
+ kwds.pop("observed")
+ cols = set(chain(values, index, on))
+ schema = self.schema.items()
+ if any(
+ tp for name, tp in schema if name in cols and isinstance(tp, categorical)
+ ):
+ msg = "`pivot` with Categoricals is not implemented for cuDF backend"
+ raise NotImplementedError(msg)
+ return self.native.pivot_table(
+ values=values,
+ index=index,
+ columns=on,
+ aggfunc=aggregate_function,
+ margins=False,
+ **kwds,
+ )
+
+ def _pivot(
+ self,
+ on: Sequence[str],
+ index: Sequence[str],
+ values: Sequence[str],
+ aggregate_function: PivotAgg | None,
+ /,
+ ) -> pd.DataFrame:
+ if aggregate_function is None:
+ return self.native.pivot(columns=on, index=index, values=values)
+ elif aggregate_function == "len":
+ return (
+ self.native.groupby([*on, *index], as_index=False)
+ .agg(dict.fromkeys(values, "size"))
+ .pivot(columns=on, index=index, values=values)
+ )
+ return self._pivot_table(on, index, values, aggregate_function)
+
+ def pivot(
+ self,
+ on: Sequence[str],
+ *,
+ index: Sequence[str] | None,
+ values: Sequence[str] | None,
+ aggregate_function: PivotAgg | None,
+ sort_columns: bool,
+ separator: str,
+ ) -> Self:
+ implementation = self._implementation
+ backend_version = self._backend_version
+ if implementation.is_pandas() and backend_version < (1, 1): # pragma: no cover
+ msg = "pivot is only supported for 'pandas>=1.1'"
+ raise NotImplementedError(msg)
+ if implementation.is_modin():
+ msg = "pivot is not supported for Modin backend due to https://github.com/modin-project/modin/issues/7409."
+ raise NotImplementedError(msg)
+
+ index, values = self._pivot_into_index_values(on, index, values)
+ result = self._pivot(on, index, values, aggregate_function)
+
+ # Select the columns in the right order
+ uniques = (
+ (
+ self.get_column(col)
+ .unique()
+ .sort(descending=False, nulls_last=False)
+ .to_list()
+ for col in on
+ )
+ if sort_columns
+ else (self.get_column(col).unique().to_list() for col in on)
+ )
+ ordered_cols = list(product(values, *chain(uniques)))
+ result = result.loc[:, ordered_cols]
+ columns = result.columns
+ remapped = self._pivot_remap_column_names(
+ columns, n_on=len(on), n_values=len(values), separator=separator
+ )
+ result.columns = remapped # type: ignore[assignment]
+ result.columns.names = [""]
+ return self._with_native(result.reset_index())
+
+ def to_arrow(self) -> Any:
+ if self._implementation is Implementation.CUDF:
+ return self.native.to_arrow(preserve_index=False)
+
+ import pyarrow as pa # ignore-banned-import()
+
+ return pa.Table.from_pandas(self.native)
+
+ def sample(
+ self,
+ n: int | None,
+ *,
+ fraction: float | None,
+ with_replacement: bool,
+ seed: int | None,
+ ) -> Self:
+ return self._with_native(
+ self.native.sample(
+ n=n, frac=fraction, replace=with_replacement, random_state=seed
+ ),
+ validate_column_names=False,
+ )
+
+ def unpivot(
+ self,
+ on: Sequence[str] | None,
+ index: Sequence[str] | None,
+ variable_name: str,
+ value_name: str,
+ ) -> Self:
+ return self._with_native(
+ self.native.melt(
+ id_vars=index,
+ value_vars=on,
+ var_name=variable_name,
+ value_name=value_name,
+ )
+ )
+
+ def explode(self, columns: Sequence[str]) -> Self:
+ dtypes = self._version.dtypes
+
+ schema = self.collect_schema()
+ for col_to_explode in columns:
+ dtype = schema[col_to_explode]
+
+ if dtype != dtypes.List:
+ msg = (
+ f"`explode` operation not supported for dtype `{dtype}`, "
+ "expected List type"
+ )
+ raise InvalidOperationError(msg)
+
+ if len(columns) == 1:
+ return self._with_native(
+ self.native.explode(columns[0]), validate_column_names=False
+ )
+ else:
+ native_frame = self.native
+ anchor_series = native_frame[columns[0]].list.len()
+
+ if not all(
+ (native_frame[col_name].list.len() == anchor_series).all()
+ for col_name in columns[1:]
+ ):
+ msg = "exploded columns must have matching element counts"
+ raise ShapeError(msg)
+
+ original_columns = self.columns
+ other_columns = [c for c in original_columns if c not in columns]
+
+ exploded_frame = native_frame[[*other_columns, columns[0]]].explode(
+ columns[0]
+ )
+ exploded_series = [
+ native_frame[col_name].explode().to_frame() for col_name in columns[1:]
+ ]
+
+ plx = self.__native_namespace__()
+ return self._with_native(
+ plx.concat([exploded_frame, *exploded_series], axis=1)[original_columns],
+ validate_column_names=False,
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/expr.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/expr.py
new file mode 100644
index 0000000..0cd9958
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/expr.py
@@ -0,0 +1,402 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Sequence
+
+from narwhals._compliant import EagerExpr
+from narwhals._expression_parsing import evaluate_output_names_and_aliases
+from narwhals._pandas_like.group_by import PandasLikeGroupBy
+from narwhals._pandas_like.series import PandasLikeSeries
+from narwhals._utils import generate_temporary_column_name
+
+if TYPE_CHECKING:
+ from typing_extensions import Self
+
+ from narwhals._compliant.typing import AliasNames, EvalNames, EvalSeries, ScalarKwargs
+ from narwhals._expression_parsing import ExprMetadata
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+ from narwhals._pandas_like.namespace import PandasLikeNamespace
+ from narwhals._utils import Implementation, Version, _FullContext
+ from narwhals.typing import (
+ FillNullStrategy,
+ NonNestedLiteral,
+ PythonLiteral,
+ RankMethod,
+ )
+
+WINDOW_FUNCTIONS_TO_PANDAS_EQUIVALENT = {
+ "cum_sum": "cumsum",
+ "cum_min": "cummin",
+ "cum_max": "cummax",
+ "cum_prod": "cumprod",
+ # Pandas cumcount starts counting from 0 while Polars starts from 1
+ # Pandas cumcount counts nulls while Polars does not
+ # So, instead of using "cumcount" we use "cumsum" on notna() to get the same result
+ "cum_count": "cumsum",
+ "rolling_sum": "sum",
+ "rolling_mean": "mean",
+ "rolling_std": "std",
+ "rolling_var": "var",
+ "shift": "shift",
+ "rank": "rank",
+ "diff": "diff",
+ "fill_null": "fillna",
+}
+
+
+def window_kwargs_to_pandas_equivalent(
+ function_name: str, kwargs: ScalarKwargs
+) -> dict[str, PythonLiteral]:
+ if function_name == "shift":
+ assert "n" in kwargs # noqa: S101
+ pandas_kwargs: dict[str, PythonLiteral] = {"periods": kwargs["n"]}
+ elif function_name == "rank":
+ assert "method" in kwargs # noqa: S101
+ assert "descending" in kwargs # noqa: S101
+ _method = kwargs["method"]
+ pandas_kwargs = {
+ "method": "first" if _method == "ordinal" else _method,
+ "ascending": not kwargs["descending"],
+ "na_option": "keep",
+ "pct": False,
+ }
+ elif function_name.startswith("cum_"): # Cumulative operation
+ pandas_kwargs = {"skipna": True}
+ elif function_name.startswith("rolling_"): # Rolling operation
+ assert "min_samples" in kwargs # noqa: S101
+ assert "window_size" in kwargs # noqa: S101
+ assert "center" in kwargs # noqa: S101
+ pandas_kwargs = {
+ "min_periods": kwargs["min_samples"],
+ "window": kwargs["window_size"],
+ "center": kwargs["center"],
+ }
+ elif function_name in {"std", "var"}:
+ assert "ddof" in kwargs # noqa: S101
+ pandas_kwargs = {"ddof": kwargs["ddof"]}
+ elif function_name == "fill_null":
+ assert "strategy" in kwargs # noqa: S101
+ assert "limit" in kwargs # noqa: S101
+ pandas_kwargs = {"strategy": kwargs["strategy"], "limit": kwargs["limit"]}
+ else: # sum, len, ...
+ pandas_kwargs = {}
+ return pandas_kwargs
+
+
+class PandasLikeExpr(EagerExpr["PandasLikeDataFrame", PandasLikeSeries]):
+ def __init__(
+ self,
+ call: EvalSeries[PandasLikeDataFrame, PandasLikeSeries],
+ *,
+ depth: int,
+ function_name: str,
+ evaluate_output_names: EvalNames[PandasLikeDataFrame],
+ alias_output_names: AliasNames | None,
+ implementation: Implementation,
+ backend_version: tuple[int, ...],
+ version: Version,
+ scalar_kwargs: ScalarKwargs | None = None,
+ ) -> None:
+ self._call = call
+ self._depth = depth
+ self._function_name = function_name
+ self._evaluate_output_names = evaluate_output_names
+ self._alias_output_names = alias_output_names
+ self._implementation = implementation
+ self._backend_version = backend_version
+ self._version = version
+ self._scalar_kwargs = scalar_kwargs or {}
+ self._metadata: ExprMetadata | None = None
+
+ def __narwhals_namespace__(self) -> PandasLikeNamespace:
+ from narwhals._pandas_like.namespace import PandasLikeNamespace
+
+ return PandasLikeNamespace(
+ self._implementation, self._backend_version, version=self._version
+ )
+
+ def __narwhals_expr__(self) -> None: ...
+
+ @classmethod
+ def from_column_names(
+ cls: type[Self],
+ evaluate_column_names: EvalNames[PandasLikeDataFrame],
+ /,
+ *,
+ context: _FullContext,
+ function_name: str = "",
+ ) -> Self:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ try:
+ return [
+ PandasLikeSeries(
+ df._native_frame[column_name],
+ implementation=df._implementation,
+ backend_version=df._backend_version,
+ version=df._version,
+ )
+ for column_name in evaluate_column_names(df)
+ ]
+ except KeyError as e:
+ if error := df._check_columns_exist(evaluate_column_names(df)):
+ raise error from e
+ raise
+
+ return cls(
+ func,
+ depth=0,
+ function_name=function_name,
+ evaluate_output_names=evaluate_column_names,
+ alias_output_names=None,
+ implementation=context._implementation,
+ backend_version=context._backend_version,
+ version=context._version,
+ )
+
+ @classmethod
+ def from_column_indices(cls, *column_indices: int, context: _FullContext) -> Self:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ native = df.native
+ return [
+ PandasLikeSeries.from_native(native.iloc[:, i], context=df)
+ for i in column_indices
+ ]
+
+ return cls(
+ func,
+ depth=0,
+ function_name="nth",
+ evaluate_output_names=cls._eval_names_indices(column_indices),
+ alias_output_names=None,
+ implementation=context._implementation,
+ backend_version=context._backend_version,
+ version=context._version,
+ )
+
+ def ewm_mean(
+ self,
+ *,
+ com: float | None,
+ span: float | None,
+ half_life: float | None,
+ alpha: float | None,
+ adjust: bool,
+ min_samples: int,
+ ignore_nulls: bool,
+ ) -> Self:
+ return self._reuse_series(
+ "ewm_mean",
+ com=com,
+ span=span,
+ half_life=half_life,
+ alpha=alpha,
+ adjust=adjust,
+ min_samples=min_samples,
+ ignore_nulls=ignore_nulls,
+ )
+
+ def cum_sum(self, *, reverse: bool) -> Self:
+ return self._reuse_series("cum_sum", scalar_kwargs={"reverse": reverse})
+
+ def shift(self, n: int) -> Self:
+ return self._reuse_series("shift", scalar_kwargs={"n": n})
+
+ def over( # noqa: C901, PLR0915
+ self, partition_by: Sequence[str], order_by: Sequence[str]
+ ) -> Self:
+ if not partition_by:
+ # e.g. `nw.col('a').cum_sum().order_by(key)`
+ # We can always easily support this as it doesn't require grouping.
+ assert order_by # noqa: S101
+
+ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]:
+ token = generate_temporary_column_name(8, df.columns)
+ df = df.with_row_index(token).sort(
+ *order_by, descending=False, nulls_last=False
+ )
+ results = self(df.drop([token], strict=True))
+ sorting_indices = df.get_column(token)
+ for s in results:
+ s._scatter_in_place(sorting_indices, s)
+ return results
+ elif not self._is_elementary():
+ msg = (
+ "Only elementary expressions are supported for `.over` in pandas-like backends.\n\n"
+ "Please see: "
+ "https://narwhals-dev.github.io/narwhals/concepts/improve_group_by_operation/"
+ )
+ raise NotImplementedError(msg)
+ else:
+ function_name = PandasLikeGroupBy._leaf_name(self)
+ pandas_function_name = WINDOW_FUNCTIONS_TO_PANDAS_EQUIVALENT.get(
+ function_name, PandasLikeGroupBy._REMAP_AGGS.get(function_name)
+ )
+ if pandas_function_name is None:
+ msg = (
+ f"Unsupported function: {function_name} in `over` context.\n\n"
+ f"Supported functions are {', '.join(WINDOW_FUNCTIONS_TO_PANDAS_EQUIVALENT)}\n"
+ f"and {', '.join(PandasLikeGroupBy._REMAP_AGGS)}."
+ )
+ raise NotImplementedError(msg)
+ pandas_kwargs = window_kwargs_to_pandas_equivalent(
+ function_name, self._scalar_kwargs
+ )
+
+ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: # noqa: C901, PLR0912
+ output_names, aliases = evaluate_output_names_and_aliases(self, df, [])
+ if function_name == "cum_count":
+ plx = self.__narwhals_namespace__()
+ df = df.with_columns(~plx.col(*output_names).is_null())
+
+ if function_name.startswith("cum_"):
+ assert "reverse" in self._scalar_kwargs # noqa: S101
+ reverse = self._scalar_kwargs["reverse"]
+ else:
+ assert "reverse" not in self._scalar_kwargs # noqa: S101
+ reverse = False
+
+ if order_by:
+ columns = list(set(partition_by).union(output_names).union(order_by))
+ token = generate_temporary_column_name(8, columns)
+ df = (
+ df.simple_select(*columns)
+ .with_row_index(token)
+ .sort(*order_by, descending=reverse, nulls_last=reverse)
+ )
+ sorting_indices = df.get_column(token)
+ elif reverse:
+ columns = list(set(partition_by).union(output_names))
+ df = df.simple_select(*columns)._gather_slice(slice(None, None, -1))
+ grouped = df._native_frame.groupby(partition_by)
+ if function_name.startswith("rolling"):
+ rolling = grouped[list(output_names)].rolling(**pandas_kwargs)
+ assert pandas_function_name is not None # help mypy # noqa: S101
+ if pandas_function_name in {"std", "var"}:
+ assert "ddof" in self._scalar_kwargs # noqa: S101
+ res_native = getattr(rolling, pandas_function_name)(
+ ddof=self._scalar_kwargs["ddof"]
+ )
+ else:
+ res_native = getattr(rolling, pandas_function_name)()
+ elif function_name == "fill_null":
+ assert "strategy" in self._scalar_kwargs # noqa: S101
+ assert "limit" in self._scalar_kwargs # noqa: S101
+ df_grouped = grouped[list(output_names)]
+ if self._scalar_kwargs["strategy"] == "forward":
+ res_native = df_grouped.ffill(limit=self._scalar_kwargs["limit"])
+ elif self._scalar_kwargs["strategy"] == "backward":
+ res_native = df_grouped.bfill(limit=self._scalar_kwargs["limit"])
+ else: # pragma: no cover
+ # This is deprecated in pandas. Indeed, `nw.col('a').fill_null(3).over('b')`
+ # does not seem very useful, and DuckDB doesn't support it either.
+ msg = "`fill_null` with `over` without `strategy` specified is not supported."
+ raise NotImplementedError(msg)
+ elif function_name == "len":
+ if len(output_names) != 1: # pragma: no cover
+ msg = "Safety check failed, please report a bug."
+ raise AssertionError(msg)
+ res_native = grouped.transform("size").to_frame(aliases[0])
+ else:
+ res_native = grouped[list(output_names)].transform(
+ pandas_function_name, **pandas_kwargs
+ )
+ result_frame = df._with_native(res_native).rename(
+ dict(zip(output_names, aliases))
+ )
+ results = [result_frame.get_column(name) for name in aliases]
+ if order_by:
+ for s in results:
+ s._scatter_in_place(sorting_indices, s)
+ return results
+ if reverse:
+ return [s._gather_slice(slice(None, None, -1)) for s in results]
+ return results
+
+ return self.__class__(
+ func,
+ depth=self._depth + 1,
+ function_name=self._function_name + "->over",
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def cum_count(self, *, reverse: bool) -> Self:
+ return self._reuse_series("cum_count", scalar_kwargs={"reverse": reverse})
+
+ def cum_min(self, *, reverse: bool) -> Self:
+ return self._reuse_series("cum_min", scalar_kwargs={"reverse": reverse})
+
+ def cum_max(self, *, reverse: bool) -> Self:
+ return self._reuse_series("cum_max", scalar_kwargs={"reverse": reverse})
+
+ def cum_prod(self, *, reverse: bool) -> Self:
+ return self._reuse_series("cum_prod", scalar_kwargs={"reverse": reverse})
+
+ def fill_null(
+ self,
+ value: Self | NonNestedLiteral,
+ strategy: FillNullStrategy | None,
+ limit: int | None,
+ ) -> Self:
+ return self._reuse_series(
+ "fill_null", scalar_kwargs={"strategy": strategy, "limit": limit}, value=value
+ )
+
+ def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ return self._reuse_series(
+ "rolling_sum",
+ scalar_kwargs={
+ "window_size": window_size,
+ "min_samples": min_samples,
+ "center": center,
+ },
+ )
+
+ def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ return self._reuse_series(
+ "rolling_mean",
+ scalar_kwargs={
+ "window_size": window_size,
+ "min_samples": min_samples,
+ "center": center,
+ },
+ )
+
+ def rolling_std(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ return self._reuse_series(
+ "rolling_std",
+ scalar_kwargs={
+ "window_size": window_size,
+ "min_samples": min_samples,
+ "center": center,
+ "ddof": ddof,
+ },
+ )
+
+ def rolling_var(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ return self._reuse_series(
+ "rolling_var",
+ scalar_kwargs={
+ "window_size": window_size,
+ "min_samples": min_samples,
+ "center": center,
+ "ddof": ddof,
+ },
+ )
+
+ def rank(self, method: RankMethod, *, descending: bool) -> Self:
+ return self._reuse_series(
+ "rank", scalar_kwargs={"method": method, "descending": descending}
+ )
+
+ def log(self, base: float) -> Self:
+ return self._reuse_series("log", base=base)
+
+ def exp(self) -> Self:
+ return self._reuse_series("exp")
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/group_by.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/group_by.py
new file mode 100644
index 0000000..ede3f05
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/group_by.py
@@ -0,0 +1,293 @@
+from __future__ import annotations
+
+import collections
+import warnings
+from typing import TYPE_CHECKING, Any, ClassVar, Iterator, Mapping, Sequence
+
+from narwhals._compliant import EagerGroupBy
+from narwhals._expression_parsing import evaluate_output_names_and_aliases
+from narwhals._pandas_like.utils import select_columns_by_name
+from narwhals._utils import find_stacklevel
+
+if TYPE_CHECKING:
+ from narwhals._compliant.group_by import NarwhalsAggregation
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+ from narwhals._pandas_like.expr import PandasLikeExpr
+
+
+class PandasLikeGroupBy(EagerGroupBy["PandasLikeDataFrame", "PandasLikeExpr", str]):
+ _REMAP_AGGS: ClassVar[Mapping[NarwhalsAggregation, Any]] = {
+ "sum": "sum",
+ "mean": "mean",
+ "median": "median",
+ "max": "max",
+ "min": "min",
+ "std": "std",
+ "var": "var",
+ "len": "size",
+ "n_unique": "nunique",
+ "count": "count",
+ }
+
+ def __init__(
+ self,
+ df: PandasLikeDataFrame,
+ keys: Sequence[PandasLikeExpr] | Sequence[str],
+ /,
+ *,
+ drop_null_keys: bool,
+ ) -> None:
+ self._df = df
+ self._drop_null_keys = drop_null_keys
+ self._compliant_frame, self._keys, self._output_key_names = self._parse_keys(
+ df, keys=keys
+ )
+ # Drop index to avoid potential collisions:
+ # https://github.com/narwhals-dev/narwhals/issues/1907.
+ if set(self.compliant.native.index.names).intersection(self.compliant.columns):
+ native_frame = self.compliant.native.reset_index(drop=True)
+ else:
+ native_frame = self.compliant.native
+ if (
+ self.compliant._implementation.is_pandas()
+ and self.compliant._backend_version < (1, 1)
+ ): # pragma: no cover
+ if (
+ not drop_null_keys
+ and self.compliant.simple_select(*self._keys).native.isna().any().any()
+ ):
+ msg = "Grouping by null values is not supported in pandas < 1.1.0"
+ raise NotImplementedError(msg)
+ self._grouped = native_frame.groupby(
+ list(self._keys), sort=False, as_index=True, observed=True
+ )
+ else:
+ self._grouped = native_frame.groupby(
+ list(self._keys),
+ sort=False,
+ as_index=True,
+ dropna=drop_null_keys,
+ observed=True,
+ )
+
+ def agg(self, *exprs: PandasLikeExpr) -> PandasLikeDataFrame: # noqa: C901, PLR0912, PLR0914, PLR0915
+ implementation = self.compliant._implementation
+ backend_version = self.compliant._backend_version
+ new_names: list[str] = self._keys.copy()
+
+ all_aggs_are_simple = True
+ exclude = (*self._keys, *self._output_key_names)
+ for expr in exprs:
+ _, aliases = evaluate_output_names_and_aliases(expr, self.compliant, exclude)
+ new_names.extend(aliases)
+ if not self._is_simple(expr):
+ all_aggs_are_simple = False
+
+ # dict of {output_name: root_name} that we count n_unique on
+ # We need to do this separately from the rest so that we
+ # can pass the `dropna` kwargs.
+ nunique_aggs: dict[str, str] = {}
+ simple_aggs: dict[str, list[str]] = collections.defaultdict(list)
+ simple_aggs_functions: set[str] = set()
+
+ # ddof to (output_names, aliases) mapping
+ std_aggs: dict[int, tuple[list[str], list[str]]] = collections.defaultdict(
+ lambda: ([], [])
+ )
+ var_aggs: dict[int, tuple[list[str], list[str]]] = collections.defaultdict(
+ lambda: ([], [])
+ )
+
+ expected_old_names: list[str] = []
+ simple_agg_new_names: list[str] = []
+
+ if all_aggs_are_simple: # noqa: PLR1702
+ for expr in exprs:
+ output_names, aliases = evaluate_output_names_and_aliases(
+ expr, self.compliant, exclude
+ )
+ if expr._depth == 0:
+ # e.g. `agg(nw.len())`
+ function_name = self._remap_expr_name(expr._function_name)
+ simple_aggs_functions.add(function_name)
+
+ for alias in aliases:
+ expected_old_names.append(f"{self._keys[0]}_{function_name}")
+ simple_aggs[self._keys[0]].append(function_name)
+ simple_agg_new_names.append(alias)
+ continue
+
+ # e.g. `agg(nw.mean('a'))`
+ function_name = self._remap_expr_name(self._leaf_name(expr))
+ is_n_unique = function_name == "nunique"
+ is_std = function_name == "std"
+ is_var = function_name == "var"
+ for output_name, alias in zip(output_names, aliases):
+ if is_n_unique:
+ nunique_aggs[alias] = output_name
+ elif is_std and (ddof := expr._scalar_kwargs["ddof"]) != 1: # pyright: ignore[reportTypedDictNotRequiredAccess]
+ std_aggs[ddof][0].append(output_name)
+ std_aggs[ddof][1].append(alias)
+ elif is_var and (ddof := expr._scalar_kwargs["ddof"]) != 1: # pyright: ignore[reportTypedDictNotRequiredAccess]
+ var_aggs[ddof][0].append(output_name)
+ var_aggs[ddof][1].append(alias)
+ else:
+ expected_old_names.append(f"{output_name}_{function_name}")
+ simple_aggs[output_name].append(function_name)
+ simple_agg_new_names.append(alias)
+ simple_aggs_functions.add(function_name)
+
+ result_aggs = []
+
+ if simple_aggs:
+ # Fast path for single aggregation such as `df.groupby(...).mean()`
+ if (
+ len(simple_aggs_functions) == 1
+ and (agg_method := simple_aggs_functions.pop()) != "size"
+ and len(simple_aggs) > 1
+ ):
+ result_simple_aggs = getattr(
+ self._grouped[list(simple_aggs.keys())], agg_method
+ )()
+ result_simple_aggs.columns = [
+ f"{a}_{agg_method}" for a in result_simple_aggs.columns
+ ]
+ else:
+ result_simple_aggs = self._grouped.agg(simple_aggs)
+ result_simple_aggs.columns = [
+ f"{a}_{b}" for a, b in result_simple_aggs.columns
+ ]
+ if not (
+ set(result_simple_aggs.columns) == set(expected_old_names)
+ and len(result_simple_aggs.columns) == len(expected_old_names)
+ ): # pragma: no cover
+ msg = (
+ f"Safety assertion failed, expected {expected_old_names} "
+ f"got {result_simple_aggs.columns}, "
+ "please report a bug at https://github.com/narwhals-dev/narwhals/issues"
+ )
+ raise AssertionError(msg)
+
+ # Rename columns, being very careful
+ expected_old_names_indices: dict[str, list[int]] = (
+ collections.defaultdict(list)
+ )
+ for idx, item in enumerate(expected_old_names):
+ expected_old_names_indices[item].append(idx)
+ index_map: list[int] = [
+ expected_old_names_indices[item].pop(0)
+ for item in result_simple_aggs.columns
+ ]
+ result_simple_aggs.columns = [simple_agg_new_names[i] for i in index_map]
+ result_aggs.append(result_simple_aggs)
+
+ if nunique_aggs:
+ result_nunique_aggs = self._grouped[list(nunique_aggs.values())].nunique(
+ dropna=False
+ )
+ result_nunique_aggs.columns = list(nunique_aggs.keys())
+
+ result_aggs.append(result_nunique_aggs)
+
+ if std_aggs:
+ for ddof, (std_output_names, std_aliases) in std_aggs.items():
+ _aggregation = self._grouped[std_output_names].std(ddof=ddof)
+ # `_aggregation` is a new object so it's OK to operate inplace.
+ _aggregation.columns = std_aliases
+ result_aggs.append(_aggregation)
+ if var_aggs:
+ for ddof, (var_output_names, var_aliases) in var_aggs.items():
+ _aggregation = self._grouped[var_output_names].var(ddof=ddof)
+ # `_aggregation` is a new object so it's OK to operate inplace.
+ _aggregation.columns = var_aliases
+ result_aggs.append(_aggregation)
+
+ if result_aggs:
+ output_names_counter = collections.Counter(
+ c for frame in result_aggs for c in frame
+ )
+ if any(v > 1 for v in output_names_counter.values()):
+ msg = ""
+ for key, value in output_names_counter.items():
+ if value > 1:
+ msg += f"\n- '{key}' {value} times"
+ else: # pragma: no cover
+ pass
+ msg = f"Expected unique output names, got:{msg}"
+ raise ValueError(msg)
+ namespace = self.compliant.__narwhals_namespace__()
+ result = namespace._concat_horizontal(result_aggs)
+ else:
+ # No aggregation provided
+ result = self.compliant.__native_namespace__().DataFrame(
+ list(self._grouped.groups.keys()), columns=self._keys
+ )
+ # Keep inplace=True to avoid making a redundant copy.
+ # This may need updating, depending on https://github.com/pandas-dev/pandas/pull/51466/files
+ result.reset_index(inplace=True) # noqa: PD002
+ return self.compliant._with_native(
+ select_columns_by_name(result, new_names, backend_version, implementation)
+ ).rename(dict(zip(self._keys, self._output_key_names)))
+
+ if self.compliant.native.empty:
+ # Don't even attempt this, it's way too inconsistent across pandas versions.
+ msg = (
+ "No results for group-by aggregation.\n\n"
+ "Hint: you were probably trying to apply a non-elementary aggregation with a "
+ "pandas-like API.\n"
+ "Please rewrite your query such that group-by aggregations "
+ "are elementary. For example, instead of:\n\n"
+ " df.group_by('a').agg(nw.col('b').round(2).mean())\n\n"
+ "use:\n\n"
+ " df.with_columns(nw.col('b').round(2)).group_by('a').agg(nw.col('b').mean())\n\n"
+ )
+ raise ValueError(msg)
+
+ warnings.warn(
+ "Found complex group-by expression, which can't be expressed efficiently with the "
+ "pandas API. If you can, please rewrite your query such that group-by aggregations "
+ "are simple (e.g. mean, std, min, max, ...). \n\n"
+ "Please see: "
+ "https://narwhals-dev.github.io/narwhals/concepts/improve_group_by_operation/",
+ UserWarning,
+ stacklevel=find_stacklevel(),
+ )
+
+ def func(df: Any) -> Any:
+ out_group = []
+ out_names = []
+ for expr in exprs:
+ results_keys = expr(self.compliant._with_native(df))
+ for result_keys in results_keys:
+ out_group.append(result_keys.native.iloc[0])
+ out_names.append(result_keys.name)
+ ns = self.compliant.__narwhals_namespace__()
+ return ns._series.from_iterable(out_group, index=out_names, context=ns).native
+
+ if implementation.is_pandas() and backend_version >= (2, 2):
+ result_complex = self._grouped.apply(func, include_groups=False)
+ else: # pragma: no cover
+ result_complex = self._grouped.apply(func)
+
+ # Keep inplace=True to avoid making a redundant copy.
+ # This may need updating, depending on https://github.com/pandas-dev/pandas/pull/51466/files
+ result_complex.reset_index(inplace=True) # noqa: PD002
+ return self.compliant._with_native(
+ select_columns_by_name(
+ result_complex, new_names, backend_version, implementation
+ )
+ ).rename(dict(zip(self._keys, self._output_key_names)))
+
+ def __iter__(self) -> Iterator[tuple[Any, PandasLikeDataFrame]]:
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore",
+ message=".*a length 1 tuple will be returned",
+ category=FutureWarning,
+ )
+
+ for key, group in self._grouped:
+ yield (
+ key,
+ self.compliant._with_native(group).simple_select(*self._df.columns),
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/namespace.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/namespace.py
new file mode 100644
index 0000000..5612c85
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/namespace.py
@@ -0,0 +1,332 @@
+from __future__ import annotations
+
+import operator
+import warnings
+from functools import reduce
+from typing import TYPE_CHECKING, Literal, Sequence
+
+import pandas as pd
+
+from narwhals._compliant import CompliantThen, EagerNamespace, EagerWhen
+from narwhals._expression_parsing import (
+ combine_alias_output_names,
+ combine_evaluate_output_names,
+)
+from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+from narwhals._pandas_like.expr import PandasLikeExpr
+from narwhals._pandas_like.selectors import PandasSelectorNamespace
+from narwhals._pandas_like.series import PandasLikeSeries
+from narwhals._pandas_like.utils import align_series_full_broadcast
+
+if TYPE_CHECKING:
+ from narwhals._pandas_like.typing import NDFrameT
+ from narwhals._utils import Implementation, Version
+ from narwhals.typing import IntoDType, NonNestedLiteral
+
+VERTICAL: Literal[0] = 0
+HORIZONTAL: Literal[1] = 1
+
+
+class PandasLikeNamespace(
+ EagerNamespace[PandasLikeDataFrame, PandasLikeSeries, PandasLikeExpr, pd.DataFrame]
+):
+ @property
+ def _dataframe(self) -> type[PandasLikeDataFrame]:
+ return PandasLikeDataFrame
+
+ @property
+ def _expr(self) -> type[PandasLikeExpr]:
+ return PandasLikeExpr
+
+ @property
+ def _series(self) -> type[PandasLikeSeries]:
+ return PandasLikeSeries
+
+ @property
+ def selectors(self) -> PandasSelectorNamespace:
+ return PandasSelectorNamespace.from_namespace(self)
+
+ # --- not in spec ---
+ def __init__(
+ self,
+ implementation: Implementation,
+ backend_version: tuple[int, ...],
+ version: Version,
+ ) -> None:
+ self._implementation = implementation
+ self._backend_version = backend_version
+ self._version = version
+
+ def lit(self, value: NonNestedLiteral, dtype: IntoDType | None) -> PandasLikeExpr:
+ def _lit_pandas_series(df: PandasLikeDataFrame) -> PandasLikeSeries:
+ pandas_series = self._series.from_iterable(
+ data=[value],
+ name="literal",
+ index=df._native_frame.index[0:1],
+ context=self,
+ )
+ if dtype:
+ return pandas_series.cast(dtype)
+ return pandas_series
+
+ return PandasLikeExpr(
+ lambda df: [_lit_pandas_series(df)],
+ depth=0,
+ function_name="lit",
+ evaluate_output_names=lambda _df: ["literal"],
+ alias_output_names=None,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def len(self) -> PandasLikeExpr:
+ return PandasLikeExpr(
+ lambda df: [
+ self._series.from_iterable(
+ [len(df._native_frame)], name="len", index=[0], context=self
+ )
+ ],
+ depth=0,
+ function_name="len",
+ evaluate_output_names=lambda _df: ["len"],
+ alias_output_names=None,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ # --- horizontal ---
+ def sum_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ series = [s for _expr in exprs for s in _expr(df)]
+ series = align_series_full_broadcast(*series)
+ native_series = (s.fill_null(0, None, None) for s in series)
+ return [reduce(operator.add, native_series)]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="sum_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def all_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ series = align_series_full_broadcast(
+ *(s for _expr in exprs for s in _expr(df))
+ )
+ return [reduce(operator.and_, series)]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="all_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def any_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ series = align_series_full_broadcast(
+ *(s for _expr in exprs for s in _expr(df))
+ )
+ return [reduce(operator.or_, series)]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="any_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def mean_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ expr_results = [s for _expr in exprs for s in _expr(df)]
+ series = align_series_full_broadcast(
+ *(s.fill_null(0, strategy=None, limit=None) for s in expr_results)
+ )
+ non_na = align_series_full_broadcast(*(1 - s.is_null() for s in expr_results))
+ return [reduce(operator.add, series) / reduce(operator.add, non_na)]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="mean_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def min_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ series = [s for _expr in exprs for s in _expr(df)]
+ series = align_series_full_broadcast(*series)
+
+ return [
+ PandasLikeSeries(
+ self.concat(
+ (s.to_frame() for s in series), how="horizontal"
+ )._native_frame.min(axis=1),
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ ).alias(series[0].name)
+ ]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="min_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def max_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ series = [s for _expr in exprs for s in _expr(df)]
+ series = align_series_full_broadcast(*series)
+
+ return [
+ PandasLikeSeries(
+ self.concat(
+ (s.to_frame() for s in series), how="horizontal"
+ )._native_frame.max(axis=1),
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ ).alias(series[0].name)
+ ]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="max_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ @property
+ def _concat(self): # type: ignore[no-untyped-def] # noqa: ANN202
+ """Return the **native** equivalent of `pd.concat`."""
+ # NOTE: Leave un-annotated to allow `@overload` matching via inference.
+ if TYPE_CHECKING:
+ import pandas as pd
+
+ return pd.concat
+ return self._implementation.to_native_namespace().concat
+
+ def _concat_diagonal(self, dfs: Sequence[pd.DataFrame], /) -> pd.DataFrame:
+ if self._implementation.is_pandas() and self._backend_version < (3,):
+ if self._backend_version < (1,):
+ return self._concat(dfs, axis=VERTICAL, copy=False, sort=False)
+ return self._concat(dfs, axis=VERTICAL, copy=False)
+ return self._concat(dfs, axis=VERTICAL)
+
+ def _concat_horizontal(self, dfs: Sequence[NDFrameT], /) -> pd.DataFrame:
+ if self._implementation.is_cudf():
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore",
+ message="The behavior of array concatenation with empty entries is deprecated",
+ category=FutureWarning,
+ )
+ return self._concat(dfs, axis=HORIZONTAL)
+ elif self._implementation.is_pandas() and self._backend_version < (3,):
+ return self._concat(dfs, axis=HORIZONTAL, copy=False)
+ return self._concat(dfs, axis=HORIZONTAL)
+
+ def _concat_vertical(self, dfs: Sequence[pd.DataFrame], /) -> pd.DataFrame:
+ cols_0 = dfs[0].columns
+ for i, df in enumerate(dfs[1:], start=1):
+ cols_current = df.columns
+ if not (
+ (len(cols_current) == len(cols_0)) and (cols_current == cols_0).all()
+ ):
+ msg = (
+ "unable to vstack, column names don't match:\n"
+ f" - dataframe 0: {cols_0.to_list()}\n"
+ f" - dataframe {i}: {cols_current.to_list()}\n"
+ )
+ raise TypeError(msg)
+ if self._implementation.is_pandas() and self._backend_version < (3,):
+ return self._concat(dfs, axis=VERTICAL, copy=False)
+ return self._concat(dfs, axis=VERTICAL)
+
+ def when(self, predicate: PandasLikeExpr) -> PandasWhen:
+ return PandasWhen.from_expr(predicate, context=self)
+
+ def concat_str(
+ self, *exprs: PandasLikeExpr, separator: str, ignore_nulls: bool
+ ) -> PandasLikeExpr:
+ string = self._version.dtypes.String()
+
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ expr_results = [s for _expr in exprs for s in _expr(df)]
+ series = align_series_full_broadcast(*(s.cast(string) for s in expr_results))
+ null_mask = align_series_full_broadcast(*(s.is_null() for s in expr_results))
+
+ if not ignore_nulls:
+ null_mask_result = reduce(operator.or_, null_mask)
+ result = reduce(lambda x, y: x + separator + y, series).zip_with(
+ ~null_mask_result, None
+ )
+ else:
+ init_value, *values = [
+ s.zip_with(~nm, "") for s, nm in zip(series, null_mask)
+ ]
+
+ sep_array = init_value.from_iterable(
+ data=[separator] * len(init_value),
+ name="sep",
+ index=init_value.native.index,
+ context=self,
+ )
+ separators = (sep_array.zip_with(~nm, "") for nm in null_mask[:-1])
+ result = reduce(
+ operator.add, (s + v for s, v in zip(separators, values)), init_value
+ )
+
+ return [result]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="concat_str",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+
+class PandasWhen(EagerWhen[PandasLikeDataFrame, PandasLikeSeries, PandasLikeExpr]):
+ @property
+ def _then(self) -> type[PandasThen]:
+ return PandasThen
+
+ def _if_then_else(
+ self,
+ when: PandasLikeSeries,
+ then: PandasLikeSeries,
+ otherwise: PandasLikeSeries | None,
+ /,
+ ) -> PandasLikeSeries:
+ if otherwise is None:
+ when, then = align_series_full_broadcast(when, then)
+ res_native = then.native.where(when.native)
+ else:
+ when, then, otherwise = align_series_full_broadcast(when, then, otherwise)
+ res_native = then.native.where(when.native, otherwise.native)
+ return then._with_native(res_native)
+
+
+class PandasThen(
+ CompliantThen[PandasLikeDataFrame, PandasLikeSeries, PandasLikeExpr], PandasLikeExpr
+): ...
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/selectors.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/selectors.py
new file mode 100644
index 0000000..f6b2a73
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/selectors.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._compliant import CompliantSelector, EagerSelectorNamespace
+from narwhals._pandas_like.expr import PandasLikeExpr
+
+if TYPE_CHECKING:
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame # noqa: F401
+ from narwhals._pandas_like.series import PandasLikeSeries # noqa: F401
+
+
+class PandasSelectorNamespace(
+ EagerSelectorNamespace["PandasLikeDataFrame", "PandasLikeSeries"]
+):
+ @property
+ def _selector(self) -> type[PandasSelector]:
+ return PandasSelector
+
+
+class PandasSelector( # type: ignore[misc]
+ CompliantSelector["PandasLikeDataFrame", "PandasLikeSeries"], PandasLikeExpr
+):
+ def _to_expr(self) -> PandasLikeExpr:
+ return PandasLikeExpr(
+ self._call,
+ depth=self._depth,
+ function_name=self._function_name,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series.py
new file mode 100644
index 0000000..0ea4e83
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series.py
@@ -0,0 +1,1109 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Iterable, Iterator, Mapping, Sequence, cast
+
+import numpy as np
+
+from narwhals._compliant import EagerSeries
+from narwhals._pandas_like.series_cat import PandasLikeSeriesCatNamespace
+from narwhals._pandas_like.series_dt import PandasLikeSeriesDateTimeNamespace
+from narwhals._pandas_like.series_list import PandasLikeSeriesListNamespace
+from narwhals._pandas_like.series_str import PandasLikeSeriesStringNamespace
+from narwhals._pandas_like.series_struct import PandasLikeSeriesStructNamespace
+from narwhals._pandas_like.utils import (
+ align_and_extract_native,
+ get_dtype_backend,
+ narwhals_to_native_dtype,
+ native_to_narwhals_dtype,
+ object_native_to_narwhals_dtype,
+ rename,
+ select_columns_by_name,
+ set_index,
+)
+from narwhals._utils import (
+ Implementation,
+ is_list_of,
+ parse_version,
+ validate_backend_version,
+)
+from narwhals.dependencies import is_numpy_array_1d, is_pandas_like_series
+from narwhals.exceptions import InvalidOperationError
+
+if TYPE_CHECKING:
+ from types import ModuleType
+ from typing import Hashable
+
+ import pandas as pd
+ import polars as pl
+ import pyarrow as pa
+ from typing_extensions import Self, TypeIs
+
+ from narwhals._arrow.typing import ChunkedArrayAny
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+ from narwhals._pandas_like.namespace import PandasLikeNamespace
+ from narwhals._utils import Version, _FullContext
+ from narwhals.dtypes import DType
+ from narwhals.typing import (
+ ClosedInterval,
+ FillNullStrategy,
+ Into1DArray,
+ IntoDType,
+ NonNestedLiteral,
+ NumericLiteral,
+ RankMethod,
+ RollingInterpolationMethod,
+ SizedMultiIndexSelector,
+ TemporalLiteral,
+ _1DArray,
+ _AnyDArray,
+ _SliceIndex,
+ )
+
+PANDAS_TO_NUMPY_DTYPE_NO_MISSING = {
+ "Int64": "int64",
+ "int64[pyarrow]": "int64",
+ "Int32": "int32",
+ "int32[pyarrow]": "int32",
+ "Int16": "int16",
+ "int16[pyarrow]": "int16",
+ "Int8": "int8",
+ "int8[pyarrow]": "int8",
+ "UInt64": "uint64",
+ "uint64[pyarrow]": "uint64",
+ "UInt32": "uint32",
+ "uint32[pyarrow]": "uint32",
+ "UInt16": "uint16",
+ "uint16[pyarrow]": "uint16",
+ "UInt8": "uint8",
+ "uint8[pyarrow]": "uint8",
+ "Float64": "float64",
+ "float64[pyarrow]": "float64",
+ "Float32": "float32",
+ "float32[pyarrow]": "float32",
+}
+PANDAS_TO_NUMPY_DTYPE_MISSING = {
+ "Int64": "float64",
+ "int64[pyarrow]": "float64",
+ "Int32": "float64",
+ "int32[pyarrow]": "float64",
+ "Int16": "float64",
+ "int16[pyarrow]": "float64",
+ "Int8": "float64",
+ "int8[pyarrow]": "float64",
+ "UInt64": "float64",
+ "uint64[pyarrow]": "float64",
+ "UInt32": "float64",
+ "uint32[pyarrow]": "float64",
+ "UInt16": "float64",
+ "uint16[pyarrow]": "float64",
+ "UInt8": "float64",
+ "uint8[pyarrow]": "float64",
+ "Float64": "float64",
+ "float64[pyarrow]": "float64",
+ "Float32": "float32",
+ "float32[pyarrow]": "float32",
+}
+
+
+class PandasLikeSeries(EagerSeries[Any]):
+ def __init__(
+ self,
+ native_series: Any,
+ *,
+ implementation: Implementation,
+ backend_version: tuple[int, ...],
+ version: Version,
+ ) -> None:
+ self._name = native_series.name
+ self._native_series = native_series
+ self._implementation = implementation
+ self._backend_version = backend_version
+ self._version = version
+ validate_backend_version(self._implementation, self._backend_version)
+ # Flag which indicates if, in the final step before applying an operation,
+ # the single value behind the PandasLikeSeries should be extract and treated
+ # as a Scalar. For example, in `nw.col('a') - nw.lit(3)`, the latter would
+ # become a Series of length 1. Rather that doing a full broadcast so it matches
+ # the length of the whole dataframe, we just extract the scalar.
+ self._broadcast = False
+
+ @property
+ def native(self) -> Any:
+ return self._native_series
+
+ def __native_namespace__(self) -> ModuleType:
+ if self._implementation.is_pandas_like():
+ return self._implementation.to_native_namespace()
+
+ msg = f"Expected pandas/modin/cudf, got: {type(self._implementation)}" # pragma: no cover
+ raise AssertionError(msg)
+
+ def __narwhals_namespace__(self) -> PandasLikeNamespace:
+ from narwhals._pandas_like.namespace import PandasLikeNamespace
+
+ return PandasLikeNamespace(
+ self._implementation, self._backend_version, self._version
+ )
+
+ def _gather(self, rows: SizedMultiIndexSelector[pd.Series[Any]]) -> Self:
+ rows = list(rows) if isinstance(rows, tuple) else rows
+ return self._with_native(self.native.iloc[rows])
+
+ def _gather_slice(self, rows: _SliceIndex | range) -> Self:
+ return self._with_native(
+ self.native.iloc[slice(rows.start, rows.stop, rows.step)]
+ )
+
+ def _with_version(self, version: Version) -> Self:
+ return self.__class__(
+ self.native,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=version,
+ )
+
+ def _with_native(self, series: Any, *, preserve_broadcast: bool = False) -> Self:
+ result = self.__class__(
+ series,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+ if preserve_broadcast:
+ result._broadcast = self._broadcast
+ return result
+
+ @classmethod
+ def from_iterable(
+ cls,
+ data: Iterable[Any],
+ *,
+ context: _FullContext,
+ name: str = "",
+ dtype: IntoDType | None = None,
+ index: Any = None,
+ ) -> Self:
+ implementation = context._implementation
+ backend_version = context._backend_version
+ version = context._version
+ ns = implementation.to_native_namespace()
+ kwds: dict[str, Any] = {}
+ if dtype:
+ kwds["dtype"] = narwhals_to_native_dtype(
+ dtype, None, implementation, backend_version, version
+ )
+ else:
+ if implementation.is_pandas():
+ kwds["copy"] = False
+ if index is not None and len(index):
+ kwds["index"] = index
+ return cls.from_native(ns.Series(data, name=name, **kwds), context=context)
+
+ @staticmethod
+ def _is_native(obj: Any) -> TypeIs[Any]:
+ return is_pandas_like_series(obj) # pragma: no cover
+
+ @classmethod
+ def from_native(cls, data: Any, /, *, context: _FullContext) -> Self:
+ return cls(
+ data,
+ implementation=context._implementation,
+ backend_version=context._backend_version,
+ version=context._version,
+ )
+
+ @classmethod
+ def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self:
+ implementation = context._implementation
+ arr = data if is_numpy_array_1d(data) else [data]
+ native = implementation.to_native_namespace().Series(arr, name="")
+ return cls.from_native(native, context=context)
+
+ @property
+ def name(self) -> str:
+ return self._name
+
+ @property
+ def dtype(self) -> DType:
+ native_dtype = self.native.dtype
+ return (
+ native_to_narwhals_dtype(native_dtype, self._version, self._implementation)
+ if native_dtype != "object"
+ else object_native_to_narwhals_dtype(
+ self.native, self._version, self._implementation
+ )
+ )
+
+ def ewm_mean(
+ self,
+ *,
+ com: float | None,
+ span: float | None,
+ half_life: float | None,
+ alpha: float | None,
+ adjust: bool,
+ min_samples: int,
+ ignore_nulls: bool,
+ ) -> PandasLikeSeries:
+ ser = self.native
+ mask_na = ser.isna()
+ if self._implementation is Implementation.CUDF:
+ if (min_samples == 0 and not ignore_nulls) or (not mask_na.any()):
+ result = ser.ewm(
+ com=com, span=span, halflife=half_life, alpha=alpha, adjust=adjust
+ ).mean()
+ else:
+ msg = (
+ "cuDF only supports `ewm_mean` when there are no missing values "
+ "or when both `min_period=0` and `ignore_nulls=False`"
+ )
+ raise NotImplementedError(msg)
+ else:
+ result = ser.ewm(
+ com, span, half_life, alpha, min_samples, adjust, ignore_na=ignore_nulls
+ ).mean()
+ result[mask_na] = None
+ return self._with_native(result)
+
+ def scatter(self, indices: int | Sequence[int], values: Any) -> Self:
+ if isinstance(values, self.__class__):
+ values = set_index(
+ values.native,
+ self.native.index[indices],
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ )
+ s = self.native.copy(deep=True)
+ s.iloc[indices] = values
+ s.name = self.name
+ return self._with_native(s)
+
+ def _scatter_in_place(self, indices: Self, values: Self) -> None:
+ # Scatter, modifying original Series. Use with care!
+ values_native = set_index(
+ values.native,
+ self.native.index[indices.native],
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ )
+ if self._implementation is Implementation.PANDAS and parse_version(np) < (2,):
+ values_native = values_native.copy() # pragma: no cover
+ min_pd_version = (1, 2)
+ if (
+ self._implementation is Implementation.PANDAS
+ and self._backend_version < min_pd_version
+ ):
+ self.native.iloc[indices.native.values] = values_native # noqa: PD011
+ else:
+ self.native.iloc[indices.native] = values_native
+
+ def cast(self, dtype: IntoDType) -> Self:
+ pd_dtype = narwhals_to_native_dtype(
+ dtype,
+ dtype_backend=get_dtype_backend(self.native.dtype, self._implementation),
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+ return self._with_native(self.native.astype(pd_dtype), preserve_broadcast=True)
+
+ def item(self, index: int | None) -> Any:
+ # cuDF doesn't have Series.item().
+ if index is None:
+ if len(self) != 1:
+ msg = (
+ "can only call '.item()' if the Series is of length 1,"
+ f" or an explicit index is provided (Series is of length {len(self)})"
+ )
+ raise ValueError(msg)
+ return self.native.iloc[0]
+ return self.native.iloc[index]
+
+ def to_frame(self) -> PandasLikeDataFrame:
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+
+ return PandasLikeDataFrame(
+ self.native.to_frame(),
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ validate_column_names=False,
+ )
+
+ def to_list(self) -> list[Any]:
+ is_cudf = self._implementation.is_cudf()
+ return self.native.to_arrow().to_pylist() if is_cudf else self.native.to_list()
+
+ def is_between(
+ self, lower_bound: Any, upper_bound: Any, closed: ClosedInterval
+ ) -> Self:
+ ser = self.native
+ _, lower_bound = align_and_extract_native(self, lower_bound)
+ _, upper_bound = align_and_extract_native(self, upper_bound)
+ if closed == "left":
+ res = ser.ge(lower_bound) & ser.lt(upper_bound)
+ elif closed == "right":
+ res = ser.gt(lower_bound) & ser.le(upper_bound)
+ elif closed == "none":
+ res = ser.gt(lower_bound) & ser.lt(upper_bound)
+ elif closed == "both":
+ res = ser.ge(lower_bound) & ser.le(upper_bound)
+ else: # pragma: no cover
+ raise AssertionError
+ return self._with_native(res).alias(ser.name)
+
+ def is_in(self, other: Any) -> PandasLikeSeries:
+ return self._with_native(self.native.isin(other))
+
+ def arg_true(self) -> PandasLikeSeries:
+ ser = self.native
+ result = ser.__class__(range(len(ser)), name=ser.name, index=ser.index).loc[ser]
+ return self._with_native(result)
+
+ def arg_min(self) -> int:
+ if self._implementation is Implementation.PANDAS and self._backend_version < (1,):
+ return self.native.to_numpy().argmin()
+ return self.native.argmin()
+
+ def arg_max(self) -> int:
+ ser = self.native
+ if self._implementation is Implementation.PANDAS and self._backend_version < (1,):
+ return ser.to_numpy().argmax()
+ return ser.argmax()
+
+ # Binary comparisons
+
+ def filter(self, predicate: Any) -> PandasLikeSeries:
+ if not is_list_of(predicate, bool):
+ _, other_native = align_and_extract_native(self, predicate)
+ else:
+ other_native = predicate
+ return self._with_native(self.native.loc[other_native]).alias(self.name)
+
+ def __eq__(self, other: object) -> PandasLikeSeries: # type: ignore[override]
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser == other).alias(self.name)
+
+ def __ne__(self, other: object) -> PandasLikeSeries: # type: ignore[override]
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser != other).alias(self.name)
+
+ def __ge__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser >= other).alias(self.name)
+
+ def __gt__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser > other).alias(self.name)
+
+ def __le__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser <= other).alias(self.name)
+
+ def __lt__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser < other).alias(self.name)
+
+ def __and__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser & other).alias(self.name)
+
+ def __rand__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ ser = cast("pd.Series[Any]", ser)
+ return self._with_native(ser.__and__(other)).alias(self.name)
+
+ def __or__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser | other).alias(self.name)
+
+ def __ror__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ ser = cast("pd.Series[Any]", ser)
+ return self._with_native(ser.__or__(other)).alias(self.name)
+
+ def __add__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser + other).alias(self.name)
+
+ def __radd__(self, other: Any) -> PandasLikeSeries:
+ _, other_native = align_and_extract_native(self, other)
+ return self._with_native(self.native.__radd__(other_native)).alias(self.name)
+
+ def __sub__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser - other).alias(self.name)
+
+ def __rsub__(self, other: Any) -> PandasLikeSeries:
+ _, other_native = align_and_extract_native(self, other)
+ return self._with_native(self.native.__rsub__(other_native)).alias(self.name)
+
+ def __mul__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser * other).alias(self.name)
+
+ def __rmul__(self, other: Any) -> PandasLikeSeries:
+ _, other_native = align_and_extract_native(self, other)
+ return self._with_native(self.native.__rmul__(other_native)).alias(self.name)
+
+ def __truediv__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser / other).alias(self.name)
+
+ def __rtruediv__(self, other: Any) -> PandasLikeSeries:
+ _, other_native = align_and_extract_native(self, other)
+ return self._with_native(self.native.__rtruediv__(other_native)).alias(self.name)
+
+ def __floordiv__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser // other).alias(self.name)
+
+ def __rfloordiv__(self, other: Any) -> PandasLikeSeries:
+ _, other_native = align_and_extract_native(self, other)
+ return self._with_native(self.native.__rfloordiv__(other_native)).alias(self.name)
+
+ def __pow__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser**other).alias(self.name)
+
+ def __rpow__(self, other: Any) -> PandasLikeSeries:
+ _, other_native = align_and_extract_native(self, other)
+ return self._with_native(self.native.__rpow__(other_native)).alias(self.name)
+
+ def __mod__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser % other).alias(self.name)
+
+ def __rmod__(self, other: Any) -> PandasLikeSeries:
+ _, other_native = align_and_extract_native(self, other)
+ return self._with_native(self.native.__rmod__(other_native)).alias(self.name)
+
+ # Unary
+
+ def __invert__(self: PandasLikeSeries) -> PandasLikeSeries:
+ return self._with_native(~self.native)
+
+ # Reductions
+
+ def any(self) -> bool:
+ return self.native.any()
+
+ def all(self) -> bool:
+ return self.native.all()
+
+ def min(self) -> Any:
+ return self.native.min()
+
+ def max(self) -> Any:
+ return self.native.max()
+
+ def sum(self) -> float:
+ return self.native.sum()
+
+ def count(self) -> int:
+ return self.native.count()
+
+ def mean(self) -> float:
+ return self.native.mean()
+
+ def median(self) -> float:
+ if not self.dtype.is_numeric():
+ msg = "`median` operation not supported for non-numeric input type."
+ raise InvalidOperationError(msg)
+ return self.native.median()
+
+ def std(self, *, ddof: int) -> float:
+ return self.native.std(ddof=ddof)
+
+ def var(self, *, ddof: int) -> float:
+ return self.native.var(ddof=ddof)
+
+ def skew(self) -> float | None:
+ ser_not_null = self.native.dropna()
+ if len(ser_not_null) == 0:
+ return None
+ elif len(ser_not_null) == 1:
+ return float("nan")
+ elif len(ser_not_null) == 2:
+ return 0.0
+ else:
+ m = ser_not_null - ser_not_null.mean()
+ m2 = (m**2).mean()
+ m3 = (m**3).mean()
+ return m3 / (m2**1.5) if m2 != 0 else float("nan")
+
+ def len(self) -> int:
+ return len(self.native)
+
+ # Transformations
+
+ def is_null(self) -> PandasLikeSeries:
+ return self._with_native(self.native.isna(), preserve_broadcast=True)
+
+ def is_nan(self) -> PandasLikeSeries:
+ ser = self.native
+ if self.dtype.is_numeric():
+ return self._with_native(ser != ser, preserve_broadcast=True) # noqa: PLR0124
+ msg = f"`.is_nan` only supported for numeric dtype and not {self.dtype}, did you mean `.is_null`?"
+ raise InvalidOperationError(msg)
+
+ def fill_null(
+ self,
+ value: Self | NonNestedLiteral,
+ strategy: FillNullStrategy | None,
+ limit: int | None,
+ ) -> Self:
+ ser = self.native
+ if value is not None:
+ _, native_value = align_and_extract_native(self, value)
+ res_ser = self._with_native(
+ ser.fillna(value=native_value), preserve_broadcast=True
+ )
+ else:
+ res_ser = self._with_native(
+ ser.ffill(limit=limit)
+ if strategy == "forward"
+ else ser.bfill(limit=limit),
+ preserve_broadcast=True,
+ )
+
+ return res_ser
+
+ def drop_nulls(self) -> PandasLikeSeries:
+ return self._with_native(self.native.dropna())
+
+ def n_unique(self) -> int:
+ return self.native.nunique(dropna=False)
+
+ def sample(
+ self,
+ n: int | None,
+ *,
+ fraction: float | None,
+ with_replacement: bool,
+ seed: int | None,
+ ) -> Self:
+ return self._with_native(
+ self.native.sample(
+ n=n, frac=fraction, replace=with_replacement, random_state=seed
+ )
+ )
+
+ def abs(self) -> PandasLikeSeries:
+ return self._with_native(self.native.abs())
+
+ def cum_sum(self, *, reverse: bool) -> Self:
+ result = (
+ self.native.cumsum(skipna=True)
+ if not reverse
+ else self.native[::-1].cumsum(skipna=True)[::-1]
+ )
+ return self._with_native(result)
+
+ def unique(self, *, maintain_order: bool = True) -> PandasLikeSeries:
+ """Pandas always maintains order, as per its docstring.
+
+ > Uniques are returned in order of appearance.
+ """
+ return self._with_native(
+ self.native.__class__(self.native.unique(), name=self.name)
+ )
+
+ def diff(self) -> PandasLikeSeries:
+ return self._with_native(self.native.diff())
+
+ def shift(self, n: int) -> PandasLikeSeries:
+ return self._with_native(self.native.shift(n))
+
+ def replace_strict(
+ self,
+ old: Sequence[Any] | Mapping[Any, Any],
+ new: Sequence[Any],
+ *,
+ return_dtype: IntoDType | None,
+ ) -> PandasLikeSeries:
+ tmp_name = f"{self.name}_tmp"
+ dtype_backend = get_dtype_backend(self.native.dtype, self._implementation)
+ dtype = (
+ narwhals_to_native_dtype(
+ return_dtype,
+ dtype_backend,
+ self._implementation,
+ self._backend_version,
+ self._version,
+ )
+ if return_dtype
+ else None
+ )
+ namespace = self.__native_namespace__()
+ other = namespace.DataFrame(
+ {self.name: old, tmp_name: namespace.Series(new, dtype=dtype)}
+ )
+ result = self._with_native(
+ self.native.to_frame().merge(other, on=self.name, how="left")[tmp_name]
+ ).alias(self.name)
+ if result.is_null().sum() != self.is_null().sum():
+ msg = (
+ "replace_strict did not replace all non-null values.\n\n"
+ f"The following did not get replaced: {self.filter(~self.is_null() & result.is_null()).unique(maintain_order=False).to_list()}"
+ )
+ raise ValueError(msg)
+ return result
+
+ def sort(self, *, descending: bool, nulls_last: bool) -> PandasLikeSeries:
+ na_position = "last" if nulls_last else "first"
+ return self._with_native(
+ self.native.sort_values(ascending=not descending, na_position=na_position)
+ ).alias(self.name)
+
+ def alias(self, name: str | Hashable) -> Self:
+ if name != self.name:
+ return self._with_native(
+ rename(
+ self.native,
+ name,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ ),
+ preserve_broadcast=True,
+ )
+ return self
+
+ def __array__(self, dtype: Any, *, copy: bool | None) -> _1DArray:
+ # pandas used to always return object dtype for nullable dtypes.
+ # So, we intercept __array__ and pass to `to_numpy` ourselves to make
+ # sure an appropriate numpy dtype is returned.
+ return self.to_numpy(dtype=dtype, copy=copy)
+
+ def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray:
+ # the default is meant to be None, but pandas doesn't allow it?
+ # https://numpy.org/doc/stable/reference/generated/numpy.ndarray.__array__.html
+ dtypes = self._version.dtypes
+ if isinstance(self.dtype, dtypes.Datetime) and self.dtype.time_zone is not None:
+ s = self.dt.convert_time_zone("UTC").dt.replace_time_zone(None).native
+ else:
+ s = self.native
+
+ has_missing = s.isna().any()
+ kwargs: dict[Any, Any] = {"copy": copy or self._implementation.is_cudf()}
+ if has_missing and str(s.dtype) in PANDAS_TO_NUMPY_DTYPE_MISSING:
+ if self._implementation is Implementation.PANDAS and self._backend_version < (
+ 1,
+ ): # pragma: no cover
+ ...
+ else:
+ kwargs.update({"na_value": float("nan")})
+ dtype = dtype or PANDAS_TO_NUMPY_DTYPE_MISSING[str(s.dtype)]
+ if not has_missing and str(s.dtype) in PANDAS_TO_NUMPY_DTYPE_NO_MISSING:
+ dtype = dtype or PANDAS_TO_NUMPY_DTYPE_NO_MISSING[str(s.dtype)]
+ return s.to_numpy(dtype=dtype, **kwargs)
+
+ def to_pandas(self) -> pd.Series[Any]:
+ if self._implementation is Implementation.PANDAS:
+ return self.native
+ elif self._implementation is Implementation.CUDF: # pragma: no cover
+ return self.native.to_pandas()
+ elif self._implementation is Implementation.MODIN:
+ return self.native._to_pandas()
+ msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
+ raise AssertionError(msg)
+
+ def to_polars(self) -> pl.Series:
+ import polars as pl # ignore-banned-import
+
+ return pl.from_pandas(self.to_pandas())
+
+ # --- descriptive ---
+ def is_unique(self) -> Self:
+ return self._with_native(~self.native.duplicated(keep=False)).alias(self.name)
+
+ def null_count(self) -> int:
+ return self.native.isna().sum()
+
+ def is_first_distinct(self) -> Self:
+ return self._with_native(~self.native.duplicated(keep="first")).alias(self.name)
+
+ def is_last_distinct(self) -> Self:
+ return self._with_native(~self.native.duplicated(keep="last")).alias(self.name)
+
+ def is_sorted(self, *, descending: bool) -> bool:
+ if not isinstance(descending, bool):
+ msg = f"argument 'descending' should be boolean, found {type(descending)}"
+ raise TypeError(msg)
+
+ if descending:
+ return self.native.is_monotonic_decreasing
+ else:
+ return self.native.is_monotonic_increasing
+
+ def value_counts(
+ self, *, sort: bool, parallel: bool, name: str | None, normalize: bool
+ ) -> PandasLikeDataFrame:
+ """Parallel is unused, exists for compatibility."""
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+
+ index_name_ = "index" if self._name is None else self._name
+ value_name_ = name or ("proportion" if normalize else "count")
+ val_count = self.native.value_counts(
+ dropna=False, sort=False, normalize=normalize
+ ).reset_index()
+
+ val_count.columns = [index_name_, value_name_]
+
+ if sort:
+ val_count = val_count.sort_values(value_name_, ascending=False)
+
+ return PandasLikeDataFrame.from_native(val_count, context=self)
+
+ def quantile(
+ self, quantile: float, interpolation: RollingInterpolationMethod
+ ) -> float:
+ return self.native.quantile(q=quantile, interpolation=interpolation)
+
+ def zip_with(self, mask: Any, other: Any) -> PandasLikeSeries:
+ ser = self.native
+ _, mask = align_and_extract_native(self, mask)
+ _, other = align_and_extract_native(self, other)
+ res = ser.where(mask, other)
+ return self._with_native(res)
+
+ def head(self, n: int) -> Self:
+ return self._with_native(self.native.head(n))
+
+ def tail(self, n: int) -> Self:
+ return self._with_native(self.native.tail(n))
+
+ def round(self, decimals: int) -> Self:
+ return self._with_native(self.native.round(decimals=decimals))
+
+ def to_dummies(self, *, separator: str, drop_first: bool) -> PandasLikeDataFrame:
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+
+ plx = self.__native_namespace__()
+ series = self.native
+ name = str(self._name) if self._name else ""
+
+ null_col_pl = f"{name}{separator}null"
+
+ has_nulls = series.isna().any()
+ result = plx.get_dummies(
+ series,
+ prefix=name,
+ prefix_sep=separator,
+ drop_first=drop_first,
+ # Adds a null column at the end, depending on whether or not there are any.
+ dummy_na=has_nulls,
+ dtype="int8",
+ )
+ if has_nulls:
+ *cols, null_col_pd = list(result.columns)
+ output_order = [null_col_pd, *cols]
+ result = rename(
+ select_columns_by_name(
+ result, output_order, self._backend_version, self._implementation
+ ),
+ columns={null_col_pd: null_col_pl},
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ )
+ return PandasLikeDataFrame.from_native(result, context=self)
+
+ def gather_every(self, n: int, offset: int) -> Self:
+ return self._with_native(self.native.iloc[offset::n])
+
+ def clip(
+ self,
+ lower_bound: Self | NumericLiteral | TemporalLiteral | None,
+ upper_bound: Self | NumericLiteral | TemporalLiteral | None,
+ ) -> Self:
+ _, lower = (
+ align_and_extract_native(self, lower_bound) if lower_bound else (None, None)
+ )
+ _, upper = (
+ align_and_extract_native(self, upper_bound) if upper_bound else (None, None)
+ )
+ kwargs = {"axis": 0} if self._implementation is Implementation.MODIN else {}
+ return self._with_native(self.native.clip(lower, upper, **kwargs))
+
+ def to_arrow(self) -> pa.Array[Any]:
+ if self._implementation is Implementation.CUDF:
+ return self.native.to_arrow()
+
+ import pyarrow as pa # ignore-banned-import()
+
+ return pa.Array.from_pandas(self.native)
+
+ def mode(self) -> Self:
+ result = self.native.mode()
+ result.name = self.name
+ return self._with_native(result)
+
+ def cum_count(self, *, reverse: bool) -> Self:
+ not_na_series = ~self.native.isna()
+ result = (
+ not_na_series.cumsum()
+ if not reverse
+ else len(self) - not_na_series.cumsum() + not_na_series - 1
+ )
+ return self._with_native(result)
+
+ def cum_min(self, *, reverse: bool) -> Self:
+ result = (
+ self.native.cummin(skipna=True)
+ if not reverse
+ else self.native[::-1].cummin(skipna=True)[::-1]
+ )
+ return self._with_native(result)
+
+ def cum_max(self, *, reverse: bool) -> Self:
+ result = (
+ self.native.cummax(skipna=True)
+ if not reverse
+ else self.native[::-1].cummax(skipna=True)[::-1]
+ )
+ return self._with_native(result)
+
+ def cum_prod(self, *, reverse: bool) -> Self:
+ result = (
+ self.native.cumprod(skipna=True)
+ if not reverse
+ else self.native[::-1].cumprod(skipna=True)[::-1]
+ )
+ return self._with_native(result)
+
+ def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ result = self.native.rolling(
+ window=window_size, min_periods=min_samples, center=center
+ ).sum()
+ return self._with_native(result)
+
+ def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ result = self.native.rolling(
+ window=window_size, min_periods=min_samples, center=center
+ ).mean()
+ return self._with_native(result)
+
+ def rolling_var(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ result = self.native.rolling(
+ window=window_size, min_periods=min_samples, center=center
+ ).var(ddof=ddof)
+ return self._with_native(result)
+
+ def rolling_std(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ result = self.native.rolling(
+ window=window_size, min_periods=min_samples, center=center
+ ).std(ddof=ddof)
+ return self._with_native(result)
+
+ def __iter__(self) -> Iterator[Any]:
+ yield from self.native.__iter__()
+
+ def __contains__(self, other: Any) -> bool:
+ return self.native.isna().any() if other is None else (self.native == other).any()
+
+ def is_finite(self) -> Self:
+ s = self.native
+ return self._with_native((s > float("-inf")) & (s < float("inf")))
+
+ def rank(self, method: RankMethod, *, descending: bool) -> Self:
+ pd_method = "first" if method == "ordinal" else method
+ name = self.name
+ if (
+ self._implementation is Implementation.PANDAS
+ and self._backend_version < (3,)
+ and self.dtype.is_integer()
+ and (null_mask := self.native.isna()).any()
+ ):
+ # crazy workaround for the case of `na_option="keep"` and nullable
+ # integer dtypes. This should be supported in pandas > 3.0
+ # https://github.com/pandas-dev/pandas/issues/56976
+ ranked_series = (
+ self.native.to_frame()
+ .assign(**{f"{name}_is_null": null_mask})
+ .groupby(f"{name}_is_null")
+ .rank(
+ method=pd_method,
+ na_option="keep",
+ ascending=not descending,
+ pct=False,
+ )[name]
+ )
+ else:
+ ranked_series = self.native.rank(
+ method=pd_method, na_option="keep", ascending=not descending, pct=False
+ )
+ return self._with_native(ranked_series)
+
+ def hist( # noqa: C901, PLR0912
+ self,
+ bins: list[float | int] | None,
+ *,
+ bin_count: int | None,
+ include_breakpoint: bool,
+ ) -> PandasLikeDataFrame:
+ from numpy import linspace, zeros
+
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+
+ ns = self.__native_namespace__()
+ data: dict[str, Sequence[int | float | str] | _AnyDArray]
+
+ if bin_count == 0 or (bins is not None and len(bins) <= 1):
+ data = {}
+ if include_breakpoint:
+ data["breakpoint"] = []
+ data["count"] = []
+ return PandasLikeDataFrame.from_native(ns.DataFrame(data), context=self)
+
+ if self.native.count() < 1:
+ if bins is not None:
+ data = {"breakpoint": bins[1:], "count": zeros(shape=len(bins) - 1)}
+ else:
+ count = cast("int", bin_count)
+ if bin_count == 1:
+ data = {"breakpoint": [1.0], "count": [0]}
+ else:
+ data = {
+ "breakpoint": linspace(0, 1, count + 1)[1:],
+ "count": zeros(shape=count),
+ }
+ if not include_breakpoint:
+ del data["breakpoint"]
+ return PandasLikeDataFrame.from_native(ns.DataFrame(data), context=self)
+
+ if bin_count is not None:
+ # use Polars binning behavior
+ lower, upper = self.native.min(), self.native.max()
+ if lower == upper:
+ lower -= 0.5
+ upper += 0.5
+
+ if bin_count == 1:
+ data = {"breakpoint": [upper], "count": [self.native.count()]}
+ if not include_breakpoint:
+ del data["breakpoint"]
+ return PandasLikeDataFrame.from_native(ns.DataFrame(data), context=self)
+
+ bins = linspace(lower, upper, bin_count + 1)
+ bin_count = None
+
+ # pandas (2.2.*) .value_counts(bins=int) adjusts the lowest bin twice, result in improper counts.
+ # pandas (2.2.*) .value_counts(bins=[...]) adjusts the lowest bin which should not happen since
+ # the bins were explicitly passed in.
+ categories = ns.cut(
+ self.native,
+ bins=bins if bin_count is None else bin_count,
+ include_lowest=True, # Polars 1.27.0 always includes the lowest bin
+ )
+ # modin (0.32.0) .value_counts(...) silently drops bins with empty observations, .reindex
+ # is necessary to restore these bins.
+ result = categories.value_counts(dropna=True, sort=False).reindex(
+ categories.cat.categories, fill_value=0
+ )
+ data = {}
+ if include_breakpoint:
+ data["breakpoint"] = bins[1:] if bins is not None else result.index.right
+ data["count"] = result.reset_index(drop=True)
+ return PandasLikeDataFrame.from_native(ns.DataFrame(data), context=self)
+
+ def log(self, base: float) -> Self:
+ native = self.native
+ implementation = self._implementation
+
+ dtype_backend = get_dtype_backend(native.dtype, implementation=implementation)
+
+ if implementation.is_cudf():
+ import cupy as cp # ignore-banned-import # cuDF dependency.
+
+ native = self.native
+ log_arr = cp.log(native) / cp.log(base)
+ result_native = type(native)(log_arr, index=native.index, name=native.name)
+ return self._with_native(result_native)
+
+ if dtype_backend == "pyarrow":
+ import pyarrow.compute as pc
+
+ from narwhals._arrow.utils import native_to_narwhals_dtype
+
+ ca = native.array._pa_array
+ result_arr = cast("ChunkedArrayAny", pc.logb(ca, base))
+ nw_dtype = native_to_narwhals_dtype(result_arr.type, self._version)
+ out_dtype = narwhals_to_native_dtype(
+ nw_dtype,
+ "pyarrow",
+ self._implementation,
+ self._backend_version,
+ self._version,
+ )
+ result_native = native.__class__(
+ result_arr, dtype=out_dtype, index=native.index, name=native.name
+ )
+ else:
+ result_native = np.log(native) / np.log(base)
+ return self._with_native(result_native)
+
+ def exp(self) -> Self:
+ native = self.native
+ implementation = self._implementation
+
+ dtype_backend = get_dtype_backend(native.dtype, implementation=implementation)
+
+ if implementation.is_cudf():
+ import cupy as cp # ignore-banned-import # cuDF dependency.
+
+ native = self.native
+ exp_arr = cp.exp(native)
+ result_native = type(native)(exp_arr, index=native.index, name=native.name)
+ return self._with_native(result_native)
+
+ if dtype_backend == "pyarrow":
+ import pyarrow.compute as pc
+
+ from narwhals._arrow.utils import native_to_narwhals_dtype
+
+ ca = native.array._pa_array
+ result_arr = cast("ChunkedArrayAny", pc.exp(ca))
+ nw_dtype = native_to_narwhals_dtype(result_arr.type, self._version)
+ out_dtype = narwhals_to_native_dtype(
+ nw_dtype,
+ "pyarrow",
+ self._implementation,
+ self._backend_version,
+ self._version,
+ )
+ result_native = native.__class__(
+ result_arr, dtype=out_dtype, index=native.index, name=native.name
+ )
+ else:
+ result_native = np.exp(native)
+ return self._with_native(result_native)
+
+ @property
+ def str(self) -> PandasLikeSeriesStringNamespace:
+ return PandasLikeSeriesStringNamespace(self)
+
+ @property
+ def dt(self) -> PandasLikeSeriesDateTimeNamespace:
+ return PandasLikeSeriesDateTimeNamespace(self)
+
+ @property
+ def cat(self) -> PandasLikeSeriesCatNamespace:
+ return PandasLikeSeriesCatNamespace(self)
+
+ @property
+ def list(self) -> PandasLikeSeriesListNamespace:
+ if not hasattr(self.native, "list"):
+ msg = "Series must be of PyArrow List type to support list namespace."
+ raise TypeError(msg)
+ return PandasLikeSeriesListNamespace(self)
+
+ @property
+ def struct(self) -> PandasLikeSeriesStructNamespace:
+ if not hasattr(self.native, "struct"):
+ msg = "Series must be of PyArrow Struct type to support struct namespace."
+ raise TypeError(msg)
+ return PandasLikeSeriesStructNamespace(self)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_cat.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_cat.py
new file mode 100644
index 0000000..912da70
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_cat.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._compliant.any_namespace import CatNamespace
+from narwhals._pandas_like.utils import PandasLikeSeriesNamespace
+
+if TYPE_CHECKING:
+ from narwhals._pandas_like.series import PandasLikeSeries
+
+
+class PandasLikeSeriesCatNamespace(
+ PandasLikeSeriesNamespace, CatNamespace["PandasLikeSeries"]
+):
+ def get_categories(self) -> PandasLikeSeries:
+ s = self.native
+ return self.with_native(type(s)(s.cat.categories, name=s.name))
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_dt.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_dt.py
new file mode 100644
index 0000000..c8083e9
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_dt.py
@@ -0,0 +1,237 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from narwhals._compliant.any_namespace import DateTimeNamespace
+from narwhals._duration import parse_interval_string
+from narwhals._pandas_like.utils import (
+ UNIT_DICT,
+ PandasLikeSeriesNamespace,
+ calculate_timestamp_date,
+ calculate_timestamp_datetime,
+ get_dtype_backend,
+ int_dtype_mapper,
+ is_pyarrow_dtype_backend,
+)
+
+if TYPE_CHECKING:
+ from narwhals._pandas_like.series import PandasLikeSeries
+ from narwhals.typing import TimeUnit
+
+
+class PandasLikeSeriesDateTimeNamespace(
+ PandasLikeSeriesNamespace, DateTimeNamespace["PandasLikeSeries"]
+):
+ def date(self) -> PandasLikeSeries:
+ result = self.with_native(self.native.dt.date)
+ if str(result.dtype).lower() == "object":
+ msg = (
+ "Accessing `date` on the default pandas backend "
+ "will return a Series of type `object`."
+ "\nThis differs from polars API and will prevent `.dt` chaining. "
+ "Please switch to the `pyarrow` backend:"
+ '\ndf.convert_dtypes(dtype_backend="pyarrow")'
+ )
+ raise NotImplementedError(msg)
+ return result
+
+ def year(self) -> PandasLikeSeries:
+ return self.with_native(self.native.dt.year)
+
+ def month(self) -> PandasLikeSeries:
+ return self.with_native(self.native.dt.month)
+
+ def day(self) -> PandasLikeSeries:
+ return self.with_native(self.native.dt.day)
+
+ def hour(self) -> PandasLikeSeries:
+ return self.with_native(self.native.dt.hour)
+
+ def minute(self) -> PandasLikeSeries:
+ return self.with_native(self.native.dt.minute)
+
+ def second(self) -> PandasLikeSeries:
+ return self.with_native(self.native.dt.second)
+
+ def millisecond(self) -> PandasLikeSeries:
+ return self.microsecond() // 1000
+
+ def microsecond(self) -> PandasLikeSeries:
+ if self.backend_version < (3, 0, 0) and self._is_pyarrow():
+ # crazy workaround for https://github.com/pandas-dev/pandas/issues/59154
+ import pyarrow.compute as pc # ignore-banned-import()
+
+ from narwhals._arrow.utils import lit
+
+ arr_ns = self.native.array
+ arr = arr_ns.__arrow_array__()
+ result_arr = pc.add(
+ pc.multiply(pc.millisecond(arr), lit(1_000)), pc.microsecond(arr)
+ )
+ result = type(self.native)(type(arr_ns)(result_arr), name=self.native.name)
+ return self.with_native(result)
+
+ return self.with_native(self.native.dt.microsecond)
+
+ def nanosecond(self) -> PandasLikeSeries:
+ return self.microsecond() * 1_000 + self.native.dt.nanosecond
+
+ def ordinal_day(self) -> PandasLikeSeries:
+ year_start = self.native.dt.year
+ result = (
+ self.native.to_numpy().astype("datetime64[D]")
+ - (year_start.to_numpy() - 1970).astype("datetime64[Y]")
+ ).astype("int32") + 1
+ dtype = "Int64[pyarrow]" if self._is_pyarrow() else "int32"
+ return self.with_native(
+ type(self.native)(result, dtype=dtype, name=year_start.name)
+ )
+
+ def weekday(self) -> PandasLikeSeries:
+ # Pandas is 0-6 while Polars is 1-7
+ return self.with_native(self.native.dt.weekday) + 1
+
+ def _is_pyarrow(self) -> bool:
+ return is_pyarrow_dtype_backend(self.native.dtype, self.implementation)
+
+ def _get_total_seconds(self) -> Any:
+ if hasattr(self.native.dt, "total_seconds"):
+ return self.native.dt.total_seconds()
+ else: # pragma: no cover
+ return (
+ self.native.dt.days * 86400
+ + self.native.dt.seconds
+ + (self.native.dt.microseconds / 1e6)
+ + (self.native.dt.nanoseconds / 1e9)
+ )
+
+ def total_minutes(self) -> PandasLikeSeries:
+ s = self._get_total_seconds()
+ # this calculates the sign of each series element
+ s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
+ s_abs = s.abs() // 60
+ if ~s.isna().any():
+ s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
+ return self.with_native(s_abs * s_sign)
+
+ def total_seconds(self) -> PandasLikeSeries:
+ s = self._get_total_seconds()
+ # this calculates the sign of each series element
+ s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
+ s_abs = s.abs() // 1
+ if ~s.isna().any():
+ s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
+ return self.with_native(s_abs * s_sign)
+
+ def total_milliseconds(self) -> PandasLikeSeries:
+ s = self._get_total_seconds() * 1e3
+ # this calculates the sign of each series element
+ s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
+ s_abs = s.abs() // 1
+ if ~s.isna().any():
+ s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
+ return self.with_native(s_abs * s_sign)
+
+ def total_microseconds(self) -> PandasLikeSeries:
+ s = self._get_total_seconds() * 1e6
+ # this calculates the sign of each series element
+ s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
+ s_abs = s.abs() // 1
+ if ~s.isna().any():
+ s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
+ return self.with_native(s_abs * s_sign)
+
+ def total_nanoseconds(self) -> PandasLikeSeries:
+ s = self._get_total_seconds() * 1e9
+ # this calculates the sign of each series element
+ s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
+ s_abs = s.abs() // 1
+ if ~s.isna().any():
+ s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
+ return self.with_native(s_abs * s_sign)
+
+ def to_string(self, format: str) -> PandasLikeSeries:
+ # Polars' parser treats `'%.f'` as pandas does `'.%f'`
+ # PyArrow interprets `'%S'` as "seconds, plus fractional seconds"
+ # and doesn't support `%f`
+ if not self._is_pyarrow():
+ format = format.replace("%S%.f", "%S.%f")
+ else:
+ format = format.replace("%S.%f", "%S").replace("%S%.f", "%S")
+ return self.with_native(self.native.dt.strftime(format))
+
+ def replace_time_zone(self, time_zone: str | None) -> PandasLikeSeries:
+ de_zone = self.native.dt.tz_localize(None)
+ result = de_zone.dt.tz_localize(time_zone) if time_zone is not None else de_zone
+ return self.with_native(result)
+
+ def convert_time_zone(self, time_zone: str) -> PandasLikeSeries:
+ if self.compliant.dtype.time_zone is None: # type: ignore[attr-defined]
+ result = self.native.dt.tz_localize("UTC").dt.tz_convert(time_zone)
+ else:
+ result = self.native.dt.tz_convert(time_zone)
+ return self.with_native(result)
+
+ def timestamp(self, time_unit: TimeUnit) -> PandasLikeSeries:
+ s = self.native
+ dtype = self.compliant.dtype
+ mask_na = s.isna()
+ dtypes = self.version.dtypes
+ if dtype == dtypes.Date:
+ # Date is only supported in pandas dtypes if pyarrow-backed
+ s_cast = s.astype("Int32[pyarrow]")
+ result = calculate_timestamp_date(s_cast, time_unit)
+ elif isinstance(dtype, dtypes.Datetime):
+ fn = (
+ s.view
+ if (self.implementation.is_pandas() and self.backend_version < (2,))
+ else s.astype
+ )
+ s_cast = fn("Int64[pyarrow]") if self._is_pyarrow() else fn("int64")
+ result = calculate_timestamp_datetime(s_cast, dtype.time_unit, time_unit)
+ else:
+ msg = "Input should be either of Date or Datetime type"
+ raise TypeError(msg)
+ result[mask_na] = None
+ return self.with_native(result)
+
+ def truncate(self, every: str) -> PandasLikeSeries:
+ multiple, unit = parse_interval_string(every)
+ native = self.native
+ if self.implementation.is_cudf():
+ if multiple != 1:
+ msg = f"Only multiple `1` is supported for cuDF, got: {multiple}."
+ raise NotImplementedError(msg)
+ return self.with_native(self.native.dt.floor(UNIT_DICT.get(unit, unit)))
+ dtype_backend = get_dtype_backend(native.dtype, self.compliant._implementation)
+ if unit in {"mo", "q", "y"}:
+ if self.implementation.is_cudf():
+ msg = f"Truncating to {unit} is not supported yet for cuDF."
+ raise NotImplementedError(msg)
+ if dtype_backend == "pyarrow":
+ import pyarrow.compute as pc # ignore-banned-import
+
+ from narwhals._arrow.utils import UNITS_DICT
+
+ ca = native.array._pa_array
+ result_arr = pc.floor_temporal(ca, multiple, UNITS_DICT[unit])
+ else:
+ if unit == "q":
+ multiple *= 3
+ np_unit = "M"
+ elif unit == "mo":
+ np_unit = "M"
+ else:
+ np_unit = "Y"
+ arr = native.values
+ arr_dtype = arr.dtype
+ result_arr = arr.astype(f"datetime64[{multiple}{np_unit}]").astype(
+ arr_dtype
+ )
+ result_native = native.__class__(
+ result_arr, dtype=native.dtype, index=native.index, name=native.name
+ )
+ return self.with_native(result_native)
+ return self.with_native(
+ self.native.dt.floor(f"{multiple}{UNIT_DICT.get(unit, unit)}")
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_list.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_list.py
new file mode 100644
index 0000000..7816c1b
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_list.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._compliant.any_namespace import ListNamespace
+from narwhals._pandas_like.utils import (
+ PandasLikeSeriesNamespace,
+ get_dtype_backend,
+ narwhals_to_native_dtype,
+)
+
+if TYPE_CHECKING:
+ from narwhals._pandas_like.series import PandasLikeSeries
+
+
+class PandasLikeSeriesListNamespace(
+ PandasLikeSeriesNamespace, ListNamespace["PandasLikeSeries"]
+):
+ def len(self) -> PandasLikeSeries:
+ result = self.native.list.len()
+ implementation = self.implementation
+ backend_version = self.backend_version
+ if implementation.is_pandas() and backend_version < (3, 0): # pragma: no cover
+ # `result` is a new object so it's safe to do this inplace.
+ result.index = self.native.index
+ dtype = narwhals_to_native_dtype(
+ self.version.dtypes.UInt32(),
+ get_dtype_backend(result.dtype, implementation),
+ implementation,
+ backend_version,
+ self.version,
+ )
+ return self.with_native(result.astype(dtype)).alias(self.native.name)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_str.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_str.py
new file mode 100644
index 0000000..c4bef09
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_str.py
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from narwhals._compliant.any_namespace import StringNamespace
+from narwhals._pandas_like.utils import (
+ PandasLikeSeriesNamespace,
+ is_pyarrow_dtype_backend,
+)
+
+if TYPE_CHECKING:
+ from narwhals._pandas_like.series import PandasLikeSeries
+
+
+class PandasLikeSeriesStringNamespace(
+ PandasLikeSeriesNamespace, StringNamespace["PandasLikeSeries"]
+):
+ def len_chars(self) -> PandasLikeSeries:
+ return self.with_native(self.native.str.len())
+
+ def replace(
+ self, pattern: str, value: str, *, literal: bool, n: int
+ ) -> PandasLikeSeries:
+ return self.with_native(
+ self.native.str.replace(pat=pattern, repl=value, n=n, regex=not literal)
+ )
+
+ def replace_all(self, pattern: str, value: str, *, literal: bool) -> PandasLikeSeries:
+ return self.replace(pattern, value, literal=literal, n=-1)
+
+ def strip_chars(self, characters: str | None) -> PandasLikeSeries:
+ return self.with_native(self.native.str.strip(characters))
+
+ def starts_with(self, prefix: str) -> PandasLikeSeries:
+ return self.with_native(self.native.str.startswith(prefix))
+
+ def ends_with(self, suffix: str) -> PandasLikeSeries:
+ return self.with_native(self.native.str.endswith(suffix))
+
+ def contains(self, pattern: str, *, literal: bool) -> PandasLikeSeries:
+ return self.with_native(self.native.str.contains(pat=pattern, regex=not literal))
+
+ def slice(self, offset: int, length: int | None) -> PandasLikeSeries:
+ stop = offset + length if length else None
+ return self.with_native(self.native.str.slice(start=offset, stop=stop))
+
+ def split(self, by: str) -> PandasLikeSeries:
+ implementation = self.implementation
+ if not implementation.is_cudf() and not is_pyarrow_dtype_backend(
+ self.native.dtype, implementation
+ ):
+ msg = (
+ "This operation requires a pyarrow-backed series. "
+ "Please refer to https://narwhals-dev.github.io/narwhals/api-reference/narwhals/#narwhals.maybe_convert_dtypes "
+ "and ensure you are using dtype_backend='pyarrow'. "
+ "Additionally, make sure you have pandas version 1.5+ and pyarrow installed. "
+ )
+ raise TypeError(msg)
+ return self.with_native(self.native.str.split(pat=by))
+
+ def to_datetime(self, format: str | None) -> PandasLikeSeries:
+ # If we know inputs are timezone-aware, we can pass `utc=True` for better performance.
+ if format and any(x in format for x in ("%z", "Z")):
+ return self.with_native(self._to_datetime(format, utc=True))
+ result = self.with_native(self._to_datetime(format, utc=False))
+ if (tz := getattr(result.dtype, "time_zone", None)) and tz != "UTC":
+ return result.dt.convert_time_zone("UTC")
+ return result
+
+ def _to_datetime(self, format: str | None, *, utc: bool) -> Any:
+ return self.implementation.to_native_namespace().to_datetime(
+ self.native, format=format, utc=utc
+ )
+
+ def to_uppercase(self) -> PandasLikeSeries:
+ return self.with_native(self.native.str.upper())
+
+ def to_lowercase(self) -> PandasLikeSeries:
+ return self.with_native(self.native.str.lower())
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_struct.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_struct.py
new file mode 100644
index 0000000..dc80997
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_struct.py
@@ -0,0 +1,16 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._compliant.any_namespace import StructNamespace
+from narwhals._pandas_like.utils import PandasLikeSeriesNamespace
+
+if TYPE_CHECKING:
+ from narwhals._pandas_like.series import PandasLikeSeries
+
+
+class PandasLikeSeriesStructNamespace(
+ PandasLikeSeriesNamespace, StructNamespace["PandasLikeSeries"]
+):
+ def field(self, name: str) -> PandasLikeSeries:
+ return self.with_native(self.native.struct.field(name)).alias(name)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/typing.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/typing.py
new file mode 100644
index 0000000..6f7bcb2
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/typing.py
@@ -0,0 +1,15 @@
+from __future__ import annotations # pragma: no cover
+
+from typing import TYPE_CHECKING # pragma: no cover
+
+if TYPE_CHECKING:
+ from typing import Any, TypeVar
+
+ import pandas as pd
+ from typing_extensions import TypeAlias
+
+ from narwhals._pandas_like.expr import PandasLikeExpr
+ from narwhals._pandas_like.series import PandasLikeSeries
+
+ IntoPandasLikeExpr: TypeAlias = "PandasLikeExpr | PandasLikeSeries"
+ NDFrameT = TypeVar("NDFrameT", "pd.DataFrame", "pd.Series[Any]")
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/utils.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/utils.py
new file mode 100644
index 0000000..bc75c14
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/utils.py
@@ -0,0 +1,673 @@
+from __future__ import annotations
+
+import functools
+import re
+from contextlib import suppress
+from typing import TYPE_CHECKING, Any, Callable, Literal, Sized, TypeVar
+
+import pandas as pd
+
+from narwhals._compliant.series import EagerSeriesNamespace
+from narwhals._utils import (
+ Implementation,
+ Version,
+ _DeferredIterable,
+ check_columns_exist,
+ isinstance_or_issubclass,
+)
+from narwhals.exceptions import DuplicateError, ShapeError
+
+T = TypeVar("T", bound=Sized)
+
+if TYPE_CHECKING:
+ from pandas._typing import Dtype as PandasDtype
+
+ from narwhals._pandas_like.expr import PandasLikeExpr
+ from narwhals._pandas_like.series import PandasLikeSeries
+ from narwhals.dtypes import DType
+ from narwhals.typing import DTypeBackend, IntoDType, TimeUnit, _1DArray
+
+ ExprT = TypeVar("ExprT", bound=PandasLikeExpr)
+
+
+PANDAS_LIKE_IMPLEMENTATION = {
+ Implementation.PANDAS,
+ Implementation.CUDF,
+ Implementation.MODIN,
+}
+PD_DATETIME_RGX = r"""^
+ datetime64\[
+ (?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns
+ (?:, # Begin non-capturing group for optional timezone
+ \s* # Optional whitespace after comma
+ (?P<time_zone> # Start named group for timezone
+ [a-zA-Z\/]+ # Match timezone name, e.g., UTC, America/New_York
+ (?:[+-]\d{2}:\d{2})? # Optional offset in format +HH:MM or -HH:MM
+ | # OR
+ pytz\.FixedOffset\(\d+\) # Match pytz.FixedOffset with integer offset in parentheses
+ ) # End time_zone group
+ )? # End optional timezone group
+ \] # Closing bracket for datetime64
+$"""
+PATTERN_PD_DATETIME = re.compile(PD_DATETIME_RGX, re.VERBOSE)
+PA_DATETIME_RGX = r"""^
+ timestamp\[
+ (?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns
+ (?:, # Begin non-capturing group for optional timezone
+ \s?tz= # Match "tz=" prefix
+ (?P<time_zone> # Start named group for timezone
+ [a-zA-Z\/]* # Match timezone name (e.g., UTC, America/New_York)
+ (?: # Begin optional non-capturing group for offset
+ [+-]\d{2}:\d{2} # Match offset in format +HH:MM or -HH:MM
+ )? # End optional offset group
+ ) # End time_zone group
+ )? # End optional timezone group
+ \] # Closing bracket for timestamp
+ \[pyarrow\] # Literal string "[pyarrow]"
+$"""
+PATTERN_PA_DATETIME = re.compile(PA_DATETIME_RGX, re.VERBOSE)
+PD_DURATION_RGX = r"""^
+ timedelta64\[
+ (?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns
+ \] # Closing bracket for timedelta64
+$"""
+
+PATTERN_PD_DURATION = re.compile(PD_DURATION_RGX, re.VERBOSE)
+PA_DURATION_RGX = r"""^
+ duration\[
+ (?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns
+ \] # Closing bracket for duration
+ \[pyarrow\] # Literal string "[pyarrow]"
+$"""
+PATTERN_PA_DURATION = re.compile(PA_DURATION_RGX, re.VERBOSE)
+
+UNIT_DICT = {"d": "D", "m": "min"}
+
+
+def align_and_extract_native(
+ lhs: PandasLikeSeries, rhs: PandasLikeSeries | object
+) -> tuple[pd.Series[Any] | object, pd.Series[Any] | object]:
+ """Validate RHS of binary operation.
+
+ If the comparison isn't supported, return `NotImplemented` so that the
+ "right-hand-side" operation (e.g. `__radd__`) can be tried.
+ """
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+ from narwhals._pandas_like.series import PandasLikeSeries
+
+ lhs_index = lhs.native.index
+
+ if isinstance(rhs, PandasLikeDataFrame):
+ return NotImplemented
+
+ if lhs._broadcast and isinstance(rhs, PandasLikeSeries) and not rhs._broadcast:
+ return lhs.native.iloc[0], rhs.native
+
+ if isinstance(rhs, PandasLikeSeries):
+ if rhs._broadcast:
+ return (lhs.native, rhs.native.iloc[0])
+ if rhs.native.index is not lhs_index:
+ return (
+ lhs.native,
+ set_index(
+ rhs.native,
+ lhs_index,
+ implementation=rhs._implementation,
+ backend_version=rhs._backend_version,
+ ),
+ )
+ return (lhs.native, rhs.native)
+
+ if isinstance(rhs, list):
+ msg = "Expected Series or scalar, got list."
+ raise TypeError(msg)
+ # `rhs` must be scalar, so just leave it as-is
+ return lhs.native, rhs
+
+
+def set_index(
+ obj: T,
+ index: Any,
+ *,
+ implementation: Implementation,
+ backend_version: tuple[int, ...],
+) -> T:
+ """Wrapper around pandas' set_axis to set object index.
+
+ We can set `copy` / `inplace` based on implementation/version.
+ """
+ if isinstance(index, implementation.to_native_namespace().Index) and (
+ expected_len := len(index)
+ ) != (actual_len := len(obj)):
+ msg = f"Expected object of length {expected_len}, got length: {actual_len}"
+ raise ShapeError(msg)
+ if implementation is Implementation.CUDF: # pragma: no cover
+ obj = obj.copy(deep=False) # type: ignore[attr-defined]
+ obj.index = index # type: ignore[attr-defined]
+ return obj
+ if implementation is Implementation.PANDAS and (
+ backend_version < (1,)
+ ): # pragma: no cover
+ kwargs = {"inplace": False}
+ else:
+ kwargs = {}
+ if implementation is Implementation.PANDAS and (
+ (1, 5) <= backend_version < (3,)
+ ): # pragma: no cover
+ kwargs["copy"] = False
+ else: # pragma: no cover
+ pass
+ return obj.set_axis(index, axis=0, **kwargs) # type: ignore[attr-defined]
+
+
+def rename(
+ obj: T,
+ *args: Any,
+ implementation: Implementation,
+ backend_version: tuple[int, ...],
+ **kwargs: Any,
+) -> T:
+ """Wrapper around pandas' rename so that we can set `copy` based on implementation/version."""
+ if implementation is Implementation.PANDAS and (
+ backend_version >= (3,)
+ ): # pragma: no cover
+ return obj.rename(*args, **kwargs) # type: ignore[attr-defined]
+ return obj.rename(*args, **kwargs, copy=False) # type: ignore[attr-defined]
+
+
+@functools.lru_cache(maxsize=16)
+def non_object_native_to_narwhals_dtype(native_dtype: Any, version: Version) -> DType: # noqa: C901, PLR0912
+ dtype = str(native_dtype)
+
+ dtypes = version.dtypes
+ if dtype in {"int64", "Int64", "Int64[pyarrow]", "int64[pyarrow]"}:
+ return dtypes.Int64()
+ if dtype in {"int32", "Int32", "Int32[pyarrow]", "int32[pyarrow]"}:
+ return dtypes.Int32()
+ if dtype in {"int16", "Int16", "Int16[pyarrow]", "int16[pyarrow]"}:
+ return dtypes.Int16()
+ if dtype in {"int8", "Int8", "Int8[pyarrow]", "int8[pyarrow]"}:
+ return dtypes.Int8()
+ if dtype in {"uint64", "UInt64", "UInt64[pyarrow]", "uint64[pyarrow]"}:
+ return dtypes.UInt64()
+ if dtype in {"uint32", "UInt32", "UInt32[pyarrow]", "uint32[pyarrow]"}:
+ return dtypes.UInt32()
+ if dtype in {"uint16", "UInt16", "UInt16[pyarrow]", "uint16[pyarrow]"}:
+ return dtypes.UInt16()
+ if dtype in {"uint8", "UInt8", "UInt8[pyarrow]", "uint8[pyarrow]"}:
+ return dtypes.UInt8()
+ if dtype in {
+ "float64",
+ "Float64",
+ "Float64[pyarrow]",
+ "float64[pyarrow]",
+ "double[pyarrow]",
+ }:
+ return dtypes.Float64()
+ if dtype in {
+ "float32",
+ "Float32",
+ "Float32[pyarrow]",
+ "float32[pyarrow]",
+ "float[pyarrow]",
+ }:
+ return dtypes.Float32()
+ if dtype in {"string", "string[python]", "string[pyarrow]", "large_string[pyarrow]"}:
+ return dtypes.String()
+ if dtype in {"bool", "boolean", "boolean[pyarrow]", "bool[pyarrow]"}:
+ return dtypes.Boolean()
+ if dtype.startswith("dictionary<"):
+ return dtypes.Categorical()
+ if dtype == "category":
+ return native_categorical_to_narwhals_dtype(native_dtype, version)
+ if (match_ := PATTERN_PD_DATETIME.match(dtype)) or (
+ match_ := PATTERN_PA_DATETIME.match(dtype)
+ ):
+ dt_time_unit: TimeUnit = match_.group("time_unit") # type: ignore[assignment]
+ dt_time_zone: str | None = match_.group("time_zone")
+ return dtypes.Datetime(dt_time_unit, dt_time_zone)
+ if (match_ := PATTERN_PD_DURATION.match(dtype)) or (
+ match_ := PATTERN_PA_DURATION.match(dtype)
+ ):
+ du_time_unit: TimeUnit = match_.group("time_unit") # type: ignore[assignment]
+ return dtypes.Duration(du_time_unit)
+ if dtype == "date32[day][pyarrow]":
+ return dtypes.Date()
+ if dtype.startswith("decimal") and dtype.endswith("[pyarrow]"):
+ return dtypes.Decimal()
+ if dtype.startswith("time") and dtype.endswith("[pyarrow]"):
+ return dtypes.Time()
+ if dtype.startswith("binary") and dtype.endswith("[pyarrow]"):
+ return dtypes.Binary()
+ return dtypes.Unknown() # pragma: no cover
+
+
+def object_native_to_narwhals_dtype(
+ series: PandasLikeSeries, version: Version, implementation: Implementation
+) -> DType:
+ dtypes = version.dtypes
+ if implementation is Implementation.CUDF: # pragma: no cover
+ # Per conversations with their maintainers, they don't support arbitrary
+ # objects, so we can just return String.
+ return dtypes.String()
+
+ # Arbitrary limit of 100 elements to use to sniff dtype.
+ inferred_dtype = pd.api.types.infer_dtype(series.head(100), skipna=True)
+ if inferred_dtype == "string":
+ return dtypes.String()
+ if inferred_dtype == "empty" and version is not Version.V1:
+ # Default to String for empty Series.
+ return dtypes.String()
+ elif inferred_dtype == "empty":
+ # But preserve returning Object in V1.
+ return dtypes.Object()
+ return dtypes.Object()
+
+
+def native_categorical_to_narwhals_dtype(
+ native_dtype: pd.CategoricalDtype,
+ version: Version,
+ implementation: Literal[Implementation.CUDF] | None = None,
+) -> DType:
+ dtypes = version.dtypes
+ if version is Version.V1:
+ return dtypes.Categorical()
+ if native_dtype.ordered:
+ into_iter = (
+ _cudf_categorical_to_list(native_dtype)
+ if implementation is Implementation.CUDF
+ else native_dtype.categories.to_list
+ )
+ return dtypes.Enum(_DeferredIterable(into_iter))
+ return dtypes.Categorical()
+
+
+def _cudf_categorical_to_list(
+ native_dtype: Any,
+) -> Callable[[], list[Any]]: # pragma: no cover
+ # NOTE: https://docs.rapids.ai/api/cudf/stable/user_guide/api_docs/api/cudf.core.dtypes.categoricaldtype/#cudf.core.dtypes.CategoricalDtype
+ def fn() -> list[Any]:
+ return native_dtype.categories.to_arrow().to_pylist()
+
+ return fn
+
+
+def native_to_narwhals_dtype(
+ native_dtype: Any, version: Version, implementation: Implementation
+) -> DType:
+ str_dtype = str(native_dtype)
+
+ if str_dtype.startswith(("large_list", "list", "struct", "fixed_size_list")):
+ from narwhals._arrow.utils import (
+ native_to_narwhals_dtype as arrow_native_to_narwhals_dtype,
+ )
+
+ if hasattr(native_dtype, "to_arrow"): # pragma: no cover
+ # cudf, cudf.pandas
+ return arrow_native_to_narwhals_dtype(native_dtype.to_arrow(), version)
+ return arrow_native_to_narwhals_dtype(native_dtype.pyarrow_dtype, version)
+ if str_dtype == "category" and implementation.is_cudf():
+ # https://github.com/rapidsai/cudf/issues/18536
+ # https://github.com/rapidsai/cudf/issues/14027
+ return native_categorical_to_narwhals_dtype(
+ native_dtype, version, Implementation.CUDF
+ )
+ if str_dtype != "object":
+ return non_object_native_to_narwhals_dtype(native_dtype, version)
+ elif implementation is Implementation.DASK:
+ # Per conversations with their maintainers, they don't support arbitrary
+ # objects, so we can just return String.
+ return version.dtypes.String()
+ msg = (
+ "Unreachable code, object dtype should be handled separately" # pragma: no cover
+ )
+ raise AssertionError(msg)
+
+
+def get_dtype_backend(dtype: Any, implementation: Implementation) -> DTypeBackend:
+ """Get dtype backend for pandas type.
+
+ Matches pandas' `dtype_backend` argument in `convert_dtypes`.
+ """
+ if implementation is Implementation.CUDF:
+ return None
+ if hasattr(pd, "ArrowDtype") and isinstance(dtype, pd.ArrowDtype):
+ return "pyarrow"
+ with suppress(AttributeError):
+ sentinel = object()
+ if (
+ isinstance(dtype, pd.api.extensions.ExtensionDtype)
+ and getattr(dtype, "base", sentinel) is None
+ ):
+ return "numpy_nullable"
+ return None
+
+
+@functools.lru_cache(maxsize=16)
+def is_pyarrow_dtype_backend(dtype: Any, implementation: Implementation) -> bool:
+ return get_dtype_backend(dtype, implementation) == "pyarrow"
+
+
+def narwhals_to_native_dtype( # noqa: C901, PLR0912, PLR0915
+ dtype: IntoDType,
+ dtype_backend: DTypeBackend,
+ implementation: Implementation,
+ backend_version: tuple[int, ...],
+ version: Version,
+) -> str | PandasDtype:
+ if dtype_backend is not None and dtype_backend not in {"pyarrow", "numpy_nullable"}:
+ msg = f"Expected one of {{None, 'pyarrow', 'numpy_nullable'}}, got: '{dtype_backend}'"
+ raise ValueError(msg)
+ dtypes = version.dtypes
+ if isinstance_or_issubclass(dtype, dtypes.Decimal):
+ msg = "Casting to Decimal is not supported yet."
+ raise NotImplementedError(msg)
+ if isinstance_or_issubclass(dtype, dtypes.Float64):
+ if dtype_backend == "pyarrow":
+ return "Float64[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "Float64"
+ return "float64"
+ if isinstance_or_issubclass(dtype, dtypes.Float32):
+ if dtype_backend == "pyarrow":
+ return "Float32[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "Float32"
+ return "float32"
+ if isinstance_or_issubclass(dtype, dtypes.Int64):
+ if dtype_backend == "pyarrow":
+ return "Int64[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "Int64"
+ return "int64"
+ if isinstance_or_issubclass(dtype, dtypes.Int32):
+ if dtype_backend == "pyarrow":
+ return "Int32[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "Int32"
+ return "int32"
+ if isinstance_or_issubclass(dtype, dtypes.Int16):
+ if dtype_backend == "pyarrow":
+ return "Int16[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "Int16"
+ return "int16"
+ if isinstance_or_issubclass(dtype, dtypes.Int8):
+ if dtype_backend == "pyarrow":
+ return "Int8[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "Int8"
+ return "int8"
+ if isinstance_or_issubclass(dtype, dtypes.UInt64):
+ if dtype_backend == "pyarrow":
+ return "UInt64[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "UInt64"
+ return "uint64"
+ if isinstance_or_issubclass(dtype, dtypes.UInt32):
+ if dtype_backend == "pyarrow":
+ return "UInt32[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "UInt32"
+ return "uint32"
+ if isinstance_or_issubclass(dtype, dtypes.UInt16):
+ if dtype_backend == "pyarrow":
+ return "UInt16[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "UInt16"
+ return "uint16"
+ if isinstance_or_issubclass(dtype, dtypes.UInt8):
+ if dtype_backend == "pyarrow":
+ return "UInt8[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "UInt8"
+ return "uint8"
+ if isinstance_or_issubclass(dtype, dtypes.String):
+ if dtype_backend == "pyarrow":
+ return "string[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "string"
+ return str
+ if isinstance_or_issubclass(dtype, dtypes.Boolean):
+ if dtype_backend == "pyarrow":
+ return "boolean[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "boolean"
+ return "bool"
+ if isinstance_or_issubclass(dtype, dtypes.Categorical):
+ # TODO(Unassigned): is there no pyarrow-backed categorical?
+ # or at least, convert_dtypes(dtype_backend='pyarrow') doesn't
+ # convert to it?
+ return "category"
+ if isinstance_or_issubclass(dtype, dtypes.Datetime):
+ # Pandas does not support "ms" or "us" time units before version 2.0
+ if implementation is Implementation.PANDAS and backend_version < (
+ 2,
+ ): # pragma: no cover
+ dt_time_unit = "ns"
+ else:
+ dt_time_unit = dtype.time_unit
+
+ if dtype_backend == "pyarrow":
+ tz_part = f", tz={tz}" if (tz := dtype.time_zone) else ""
+ return f"timestamp[{dt_time_unit}{tz_part}][pyarrow]"
+ else:
+ tz_part = f", {tz}" if (tz := dtype.time_zone) else ""
+ return f"datetime64[{dt_time_unit}{tz_part}]"
+ if isinstance_or_issubclass(dtype, dtypes.Duration):
+ if implementation is Implementation.PANDAS and backend_version < (
+ 2,
+ ): # pragma: no cover
+ du_time_unit = "ns"
+ else:
+ du_time_unit = dtype.time_unit
+ return (
+ f"duration[{du_time_unit}][pyarrow]"
+ if dtype_backend == "pyarrow"
+ else f"timedelta64[{du_time_unit}]"
+ )
+ if isinstance_or_issubclass(dtype, dtypes.Date):
+ try:
+ import pyarrow as pa # ignore-banned-import
+ except ModuleNotFoundError: # pragma: no cover
+ msg = "'pyarrow>=11.0.0' is required for `Date` dtype."
+ return "date32[pyarrow]"
+ if isinstance_or_issubclass(dtype, dtypes.Enum):
+ if version is Version.V1:
+ msg = "Converting to Enum is not supported in narwhals.stable.v1"
+ raise NotImplementedError(msg)
+ if isinstance(dtype, dtypes.Enum):
+ ns = implementation.to_native_namespace()
+ return ns.CategoricalDtype(dtype.categories, ordered=True)
+ msg = "Can not cast / initialize Enum without categories present"
+ raise ValueError(msg)
+
+ if isinstance_or_issubclass(
+ dtype, (dtypes.Struct, dtypes.Array, dtypes.List, dtypes.Time, dtypes.Binary)
+ ):
+ if implementation is Implementation.PANDAS and backend_version >= (2, 2):
+ try:
+ import pandas as pd
+ import pyarrow as pa # ignore-banned-import # noqa: F401
+ except ImportError as exc: # pragma: no cover
+ msg = f"Unable to convert to {dtype} to to the following exception: {exc.msg}"
+ raise ImportError(msg) from exc
+ from narwhals._arrow.utils import (
+ narwhals_to_native_dtype as arrow_narwhals_to_native_dtype,
+ )
+
+ return pd.ArrowDtype(arrow_narwhals_to_native_dtype(dtype, version=version))
+ else: # pragma: no cover
+ msg = (
+ f"Converting to {dtype} dtype is not supported for implementation "
+ f"{implementation} and version {version}."
+ )
+ raise NotImplementedError(msg)
+ msg = f"Unknown dtype: {dtype}" # pragma: no cover
+ raise AssertionError(msg)
+
+
+def align_series_full_broadcast(*series: PandasLikeSeries) -> list[PandasLikeSeries]:
+ # Ensure all of `series` have the same length and index. Scalars get broadcasted to
+ # the full length of the longest Series. This is useful when you need to construct a
+ # full Series anyway (e.g. `DataFrame.select`). It should not be used in binary operations,
+ # such as `nw.col('a') - nw.col('a').mean()`, because then it's more efficient to extract
+ # the right-hand-side's single element as a scalar.
+ native_namespace = series[0].__native_namespace__()
+
+ lengths = [len(s) for s in series]
+ max_length = max(lengths)
+
+ idx = series[lengths.index(max_length)].native.index
+ reindexed = []
+ for s in series:
+ if s._broadcast:
+ reindexed.append(
+ s._with_native(
+ native_namespace.Series(
+ [s.native.iloc[0]] * max_length,
+ index=idx,
+ name=s.name,
+ dtype=s.native.dtype,
+ )
+ )
+ )
+
+ elif s.native.index is not idx:
+ reindexed.append(
+ s._with_native(
+ set_index(
+ s.native,
+ idx,
+ implementation=s._implementation,
+ backend_version=s._backend_version,
+ )
+ )
+ )
+ else:
+ reindexed.append(s)
+ return reindexed
+
+
+def int_dtype_mapper(dtype: Any) -> str:
+ if "pyarrow" in str(dtype):
+ return "Int64[pyarrow]"
+ if str(dtype).lower() != str(dtype): # pragma: no cover
+ return "Int64"
+ return "int64"
+
+
+def calculate_timestamp_datetime( # noqa: C901, PLR0912
+ s: pd.Series[int], original_time_unit: str, time_unit: str
+) -> pd.Series[int]:
+ if original_time_unit == "ns":
+ if time_unit == "ns":
+ result = s
+ elif time_unit == "us":
+ result = s // 1_000
+ else:
+ result = s // 1_000_000
+ elif original_time_unit == "us":
+ if time_unit == "ns":
+ result = s * 1_000
+ elif time_unit == "us":
+ result = s
+ else:
+ result = s // 1_000
+ elif original_time_unit == "ms":
+ if time_unit == "ns":
+ result = s * 1_000_000
+ elif time_unit == "us":
+ result = s * 1_000
+ else:
+ result = s
+ elif original_time_unit == "s":
+ if time_unit == "ns":
+ result = s * 1_000_000_000
+ elif time_unit == "us":
+ result = s * 1_000_000
+ else:
+ result = s * 1_000
+ else: # pragma: no cover
+ msg = f"unexpected time unit {original_time_unit}, please report a bug at https://github.com/narwhals-dev/narwhals"
+ raise AssertionError(msg)
+ return result
+
+
+def calculate_timestamp_date(s: pd.Series[int], time_unit: str) -> pd.Series[int]:
+ s = s * 86_400 # number of seconds in a day
+ if time_unit == "ns":
+ result = s * 1_000_000_000
+ elif time_unit == "us":
+ result = s * 1_000_000
+ else:
+ result = s * 1_000
+ return result
+
+
+def select_columns_by_name(
+ df: T,
+ column_names: list[str] | _1DArray, # NOTE: Cannot be a tuple!
+ backend_version: tuple[int, ...],
+ implementation: Implementation,
+) -> T:
+ """Select columns by name.
+
+ Prefer this over `df.loc[:, column_names]` as it's
+ generally more performant.
+ """
+ if len(column_names) == df.shape[1] and all(column_names == df.columns): # type: ignore[attr-defined]
+ return df
+ if (df.columns.dtype.kind == "b") or ( # type: ignore[attr-defined]
+ implementation is Implementation.PANDAS and backend_version < (1, 5)
+ ):
+ # See https://github.com/narwhals-dev/narwhals/issues/1349#issuecomment-2470118122
+ # for why we need this
+ if error := check_columns_exist(
+ column_names, # type: ignore[arg-type]
+ available=df.columns.tolist(), # type: ignore[attr-defined]
+ ):
+ raise error
+ return df.loc[:, column_names] # type: ignore[attr-defined]
+ try:
+ return df[column_names] # type: ignore[index]
+ except KeyError as e:
+ if error := check_columns_exist(
+ column_names, # type: ignore[arg-type]
+ available=df.columns.tolist(), # type: ignore[attr-defined]
+ ):
+ raise error from e
+ raise
+
+
+def check_column_names_are_unique(columns: pd.Index[str]) -> None:
+ try:
+ len_unique_columns = len(columns.drop_duplicates())
+ except Exception: # noqa: BLE001 # pragma: no cover
+ msg = f"Expected hashable (e.g. str or int) column names, got: {columns}"
+ raise ValueError(msg) from None
+
+ if len(columns) != len_unique_columns:
+ from collections import Counter
+
+ counter = Counter(columns)
+ msg = ""
+ for key, value in counter.items():
+ if value > 1:
+ msg += f"\n- '{key}' {value} times"
+ msg = f"Expected unique column names, got:{msg}"
+ raise DuplicateError(msg)
+
+
+class PandasLikeSeriesNamespace(EagerSeriesNamespace["PandasLikeSeries", Any]):
+ @property
+ def implementation(self) -> Implementation:
+ return self.compliant._implementation
+
+ @property
+ def backend_version(self) -> tuple[int, ...]:
+ return self.compliant._backend_version
+
+ @property
+ def version(self) -> Version:
+ return self.compliant._version