aboutsummaryrefslogtreecommitdiff
path: root/venv/lib/python3.8/site-packages/narwhals/functions.py
diff options
context:
space:
mode:
authorsotech117 <michael_foiani@brown.edu>2025-07-31 17:27:24 -0400
committersotech117 <michael_foiani@brown.edu>2025-07-31 17:27:24 -0400
commit5bf22fc7e3c392c8bd44315ca2d06d7dca7d084e (patch)
tree8dacb0f195df1c0788d36dd0064f6bbaa3143ede /venv/lib/python3.8/site-packages/narwhals/functions.py
parentb832d364da8c2efe09e3f75828caf73c50d01ce3 (diff)
add code for analysis of data
Diffstat (limited to 'venv/lib/python3.8/site-packages/narwhals/functions.py')
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/functions.py1793
1 files changed, 1793 insertions, 0 deletions
diff --git a/venv/lib/python3.8/site-packages/narwhals/functions.py b/venv/lib/python3.8/site-packages/narwhals/functions.py
new file mode 100644
index 0000000..b483236
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/functions.py
@@ -0,0 +1,1793 @@
+from __future__ import annotations
+
+import platform
+import sys
+from importlib.metadata import version
+from typing import TYPE_CHECKING, Any, Iterable, Literal, Mapping, Sequence, cast
+
+from narwhals._expression_parsing import (
+ ExprKind,
+ ExprMetadata,
+ apply_n_ary_operation,
+ combine_metadata,
+ extract_compliant,
+ is_scalar_like,
+)
+from narwhals._utils import (
+ Implementation,
+ Version,
+ deprecate_native_namespace,
+ flatten,
+ is_compliant_expr,
+ is_eager_allowed,
+ is_sequence_but_not_str,
+ parse_version,
+ supports_arrow_c_stream,
+ validate_laziness,
+)
+from narwhals.dependencies import (
+ is_narwhals_series,
+ is_numpy_array,
+ is_numpy_array_2d,
+ is_pyarrow_table,
+)
+from narwhals.exceptions import InvalidOperationError, ShapeError
+from narwhals.expr import Expr
+from narwhals.translate import from_native, to_native
+
+if TYPE_CHECKING:
+ from types import ModuleType
+
+ from typing_extensions import TypeAlias, TypeIs
+
+ from narwhals._compliant import CompliantExpr, CompliantNamespace
+ from narwhals._translate import IntoArrowTable
+ from narwhals.dataframe import DataFrame, LazyFrame
+ from narwhals.dtypes import DType
+ from narwhals.schema import Schema
+ from narwhals.series import Series
+ from narwhals.typing import (
+ ConcatMethod,
+ FrameT,
+ IntoDType,
+ IntoExpr,
+ IntoSeriesT,
+ NativeFrame,
+ NativeLazyFrame,
+ NativeSeries,
+ NonNestedLiteral,
+ _1DArray,
+ _2DArray,
+ )
+
+ _IntoSchema: TypeAlias = "Mapping[str, DType] | Schema | Sequence[str] | None"
+
+
+def concat(items: Iterable[FrameT], *, how: ConcatMethod = "vertical") -> FrameT:
+ """Concatenate multiple DataFrames, LazyFrames into a single entity.
+
+ Arguments:
+ items: DataFrames, LazyFrames to concatenate.
+ how: concatenating strategy
+
+ - vertical: Concatenate vertically. Column names must match.
+ - horizontal: Concatenate horizontally. If lengths don't match, then
+ missing rows are filled with null values. This is only supported
+ when all inputs are (eager) DataFrames.
+ - diagonal: Finds a union between the column schemas and fills missing column
+ values with null.
+
+ Returns:
+ A new DataFrame or LazyFrame resulting from the concatenation.
+
+ Raises:
+ TypeError: The items to concatenate should either all be eager, or all lazy
+
+ Examples:
+ Let's take an example of vertical concatenation:
+
+ >>> import pandas as pd
+ >>> import polars as pl
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+
+ Let's look at one case a for vertical concatenation (pandas backed):
+
+ >>> df_pd_1 = nw.from_native(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}))
+ >>> df_pd_2 = nw.from_native(pd.DataFrame({"a": [5, 2], "b": [1, 4]}))
+ >>> nw.concat([df_pd_1, df_pd_2], how="vertical")
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 1 4 |
+ | 1 2 5 |
+ | 2 3 6 |
+ | 0 5 1 |
+ | 1 2 4 |
+ └──────────────────┘
+
+ Let's look at one case a for horizontal concatenation (polars backed):
+
+ >>> df_pl_1 = nw.from_native(pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}))
+ >>> df_pl_2 = nw.from_native(pl.DataFrame({"c": [5, 2], "d": [1, 4]}))
+ >>> nw.concat([df_pl_1, df_pl_2], how="horizontal")
+ ┌───────────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------------|
+ |shape: (3, 4) |
+ |┌─────┬─────┬──────┬──────┐|
+ |│ a ┆ b ┆ c ┆ d │|
+ |│ --- ┆ --- ┆ --- ┆ --- │|
+ |│ i64 ┆ i64 ┆ i64 ┆ i64 │|
+ |╞═════╪═════╪══════╪══════╡|
+ |│ 1 ┆ 4 ┆ 5 ┆ 1 │|
+ |│ 2 ┆ 5 ┆ 2 ┆ 4 │|
+ |│ 3 ┆ 6 ┆ null ┆ null │|
+ |└─────┴─────┴──────┴──────┘|
+ └───────────────────────────┘
+
+ Let's look at one case a for diagonal concatenation (pyarrow backed):
+
+ >>> df_pa_1 = nw.from_native(pa.table({"a": [1, 2], "b": [3.5, 4.5]}))
+ >>> df_pa_2 = nw.from_native(pa.table({"a": [3, 4], "z": ["x", "y"]}))
+ >>> nw.concat([df_pa_1, df_pa_2], how="diagonal")
+ ┌──────────────────────────┐
+ | Narwhals DataFrame |
+ |--------------------------|
+ |pyarrow.Table |
+ |a: int64 |
+ |b: double |
+ |z: string |
+ |---- |
+ |a: [[1,2],[3,4]] |
+ |b: [[3.5,4.5],[null,null]]|
+ |z: [[null,null],["x","y"]]|
+ └──────────────────────────┘
+ """
+ from narwhals.dependencies import is_narwhals_lazyframe
+
+ if not items:
+ msg = "No items to concatenate."
+ raise ValueError(msg)
+ items = list(items)
+ validate_laziness(items)
+ if how not in {"horizontal", "vertical", "diagonal"}: # pragma: no cover
+ msg = "Only vertical, horizontal and diagonal concatenations are supported."
+ raise NotImplementedError(msg)
+ first_item = items[0]
+ if is_narwhals_lazyframe(first_item) and how == "horizontal":
+ msg = (
+ "Horizontal concatenation is not supported for LazyFrames.\n\n"
+ "Hint: you may want to use `join` instead."
+ )
+ raise InvalidOperationError(msg)
+ plx = first_item.__narwhals_namespace__()
+ return first_item._with_compliant(
+ plx.concat([df._compliant_frame for df in items], how=how)
+ )
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def new_series(
+ name: str,
+ values: Any,
+ dtype: IntoDType | None = None,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+) -> Series[Any]:
+ """Instantiate Narwhals Series from iterable (e.g. list or array).
+
+ Arguments:
+ name: Name of resulting Series.
+ values: Values of make Series from.
+ dtype: (Narwhals) dtype. If not provided, the native library
+ may auto-infer it from `values`.
+ backend: specifies which eager backend instantiate to.
+
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+
+ Returns:
+ A new Series
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> values = [4, 1, 2, 3]
+ >>> nw.new_series(name="a", values=values, dtype=nw.Int32, backend=pd)
+ ┌─────────────────────┐
+ | Narwhals Series |
+ |---------------------|
+ |0 4 |
+ |1 1 |
+ |2 2 |
+ |3 3 |
+ |Name: a, dtype: int32|
+ └─────────────────────┘
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ return _new_series_impl(name, values, dtype, backend=backend)
+
+
+def _new_series_impl(
+ name: str,
+ values: Any,
+ dtype: IntoDType | None = None,
+ *,
+ backend: ModuleType | Implementation | str,
+) -> Series[Any]:
+ implementation = Implementation.from_backend(backend)
+ if is_eager_allowed(implementation):
+ ns = Version.MAIN.namespace.from_backend(implementation).compliant
+ series = ns._series.from_iterable(values, name=name, context=ns, dtype=dtype)
+ return series.to_narwhals()
+ elif implementation is Implementation.UNKNOWN: # pragma: no cover
+ _native_namespace = implementation.to_native_namespace()
+ try:
+ native_series: NativeSeries = _native_namespace.new_series(
+ name, values, dtype
+ )
+ return from_native(native_series, series_only=True).alias(name)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `new_series` constructor."
+ raise AttributeError(msg) from e
+ msg = (
+ f"{implementation} support in Narwhals is lazy-only, but `new_series` is an eager-only function.\n\n"
+ "Hint: you may want to use an eager backend and then call `.lazy`, e.g.:\n\n"
+ f" nw.new_series('a', [1,2,3], backend='pyarrow').to_frame().lazy('{implementation}')"
+ )
+ raise ValueError(msg)
+
+
+@deprecate_native_namespace(warn_version="1.26.0")
+def from_dict(
+ data: Mapping[str, Any],
+ schema: Mapping[str, DType] | Schema | None = None,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+) -> DataFrame[Any]:
+ """Instantiate DataFrame from dictionary.
+
+ Indexes (if present, for pandas-like backends) are aligned following
+ the [left-hand-rule](../concepts/pandas_index.md/).
+
+ Notes:
+ For pandas-like dataframes, conversion to schema is applied after dataframe
+ creation.
+
+ Arguments:
+ data: Dictionary to create DataFrame from.
+ schema: The DataFrame schema as Schema or dict of {name: type}. If not
+ specified, the schema will be inferred by the native library.
+ backend: specifies which eager backend instantiate to. Only
+ necessary if inputs are not Narwhals Series.
+
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.26.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+
+ Returns:
+ A new DataFrame.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> data = {"c": [5, 2], "d": [1, 4]}
+ >>> nw.from_dict(data, backend="pandas")
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | c d |
+ | 0 5 1 |
+ | 1 2 4 |
+ └──────────────────┘
+ """
+ if not data:
+ msg = "from_dict cannot be called with empty dictionary"
+ raise ValueError(msg)
+ if backend is None:
+ data, backend = _from_dict_no_backend(data)
+ implementation = Implementation.from_backend(backend)
+ if is_eager_allowed(implementation):
+ ns = Version.MAIN.namespace.from_backend(implementation).compliant
+ return ns._dataframe.from_dict(data, schema=schema, context=ns).to_narwhals()
+ elif implementation is Implementation.UNKNOWN: # pragma: no cover
+ _native_namespace = implementation.to_native_namespace()
+ try:
+ # implementation is UNKNOWN, Narwhals extension using this feature should
+ # implement `from_dict` function in the top-level namespace.
+ native_frame: NativeFrame = _native_namespace.from_dict(data, schema=schema)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `from_dict` function."
+ raise AttributeError(msg) from e
+ return from_native(native_frame, eager_only=True)
+ msg = (
+ f"{implementation} support in Narwhals is lazy-only, but `from_dict` is an eager-only function.\n\n"
+ "Hint: you may want to use an eager backend and then call `.lazy`, e.g.:\n\n"
+ f" nw.from_dict({{'a': [1, 2]}}, backend='pyarrow').lazy('{implementation}')"
+ )
+ raise ValueError(msg)
+
+
+def _from_dict_no_backend(
+ data: Mapping[str, Series[Any] | Any], /
+) -> tuple[dict[str, Series[Any] | Any], ModuleType]:
+ for val in data.values():
+ if is_narwhals_series(val):
+ native_namespace = val.__native_namespace__()
+ break
+ else:
+ msg = "Calling `from_dict` without `backend` is only supported if all input values are already Narwhals Series"
+ raise TypeError(msg)
+ data = {key: to_native(value, pass_through=True) for key, value in data.items()}
+ return data, native_namespace
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def from_numpy(
+ data: _2DArray,
+ schema: Mapping[str, DType] | Schema | Sequence[str] | None = None,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+) -> DataFrame[Any]:
+ """Construct a DataFrame from a NumPy ndarray.
+
+ Notes:
+ Only row orientation is currently supported.
+
+ For pandas-like dataframes, conversion to schema is applied after dataframe
+ creation.
+
+ Arguments:
+ data: Two-dimensional data represented as a NumPy ndarray.
+ schema: The DataFrame schema as Schema, dict of {name: type}, or a sequence of str.
+ backend: specifies which eager backend instantiate to.
+
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+
+ Returns:
+ A new DataFrame.
+
+ Examples:
+ >>> import numpy as np
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> arr = np.array([[5, 2, 1], [1, 4, 3]])
+ >>> schema = {"c": nw.Int16(), "d": nw.Float32(), "e": nw.Int8()}
+ >>> nw.from_numpy(arr, schema=schema, backend="pyarrow")
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | pyarrow.Table |
+ | c: int16 |
+ | d: float |
+ | e: int8 |
+ | ---- |
+ | c: [[5,1]] |
+ | d: [[2,4]] |
+ | e: [[1,3]] |
+ └──────────────────┘
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ if not is_numpy_array_2d(data):
+ msg = "`from_numpy` only accepts 2D numpy arrays"
+ raise ValueError(msg)
+ if not _is_into_schema(schema):
+ msg = (
+ "`schema` is expected to be one of the following types: "
+ "Mapping[str, DType] | Schema | Sequence[str]. "
+ f"Got {type(schema)}."
+ )
+ raise TypeError(msg)
+ implementation = Implementation.from_backend(backend)
+ if is_eager_allowed(implementation):
+ ns = Version.MAIN.namespace.from_backend(implementation).compliant
+ return ns.from_numpy(data, schema).to_narwhals()
+ elif implementation is Implementation.UNKNOWN: # pragma: no cover
+ _native_namespace = implementation.to_native_namespace()
+ try:
+ # implementation is UNKNOWN, Narwhals extension using this feature should
+ # implement `from_numpy` function in the top-level namespace.
+ native_frame: NativeFrame = _native_namespace.from_numpy(data, schema=schema)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `from_numpy` function."
+ raise AttributeError(msg) from e
+ return from_native(native_frame, eager_only=True)
+ msg = (
+ f"{implementation} support in Narwhals is lazy-only, but `from_numpy` is an eager-only function.\n\n"
+ "Hint: you may want to use an eager backend and then call `.lazy`, e.g.:\n\n"
+ f" nw.from_numpy(arr, backend='pyarrow').lazy('{implementation}')"
+ )
+ raise ValueError(msg)
+
+
+def _is_into_schema(obj: Any) -> TypeIs[_IntoSchema]:
+ from narwhals.schema import Schema
+
+ return (
+ obj is None or isinstance(obj, (Mapping, Schema)) or is_sequence_but_not_str(obj)
+ )
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def from_arrow(
+ native_frame: IntoArrowTable,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+) -> DataFrame[Any]: # pragma: no cover
+ """Construct a DataFrame from an object which supports the PyCapsule Interface.
+
+ Arguments:
+ native_frame: Object which implements `__arrow_c_stream__`.
+ backend: specifies which eager backend instantiate to.
+
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+
+ Returns:
+ A new DataFrame.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": [4.2, 5.1]})
+ >>> nw.from_arrow(df_native, backend="polars")
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | shape: (2, 2) |
+ | ┌─────┬─────┐ |
+ | │ a ┆ b │ |
+ | │ --- ┆ --- │ |
+ | │ i64 ┆ f64 │ |
+ | ╞═════╪═════╡ |
+ | │ 1 ┆ 4.2 │ |
+ | │ 2 ┆ 5.1 │ |
+ | └─────┴─────┘ |
+ └──────────────────┘
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ if not (supports_arrow_c_stream(native_frame) or is_pyarrow_table(native_frame)):
+ msg = f"Given object of type {type(native_frame)} does not support PyCapsule interface"
+ raise TypeError(msg)
+ implementation = Implementation.from_backend(backend)
+ if is_eager_allowed(implementation):
+ ns = Version.MAIN.namespace.from_backend(implementation).compliant
+ return ns._dataframe.from_arrow(native_frame, context=ns).to_narwhals()
+ elif implementation is Implementation.UNKNOWN: # pragma: no cover
+ _native_namespace = implementation.to_native_namespace()
+ try:
+ # implementation is UNKNOWN, Narwhals extension using this feature should
+ # implement PyCapsule support
+ native: NativeFrame = _native_namespace.DataFrame(native_frame)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `DataFrame` class which accepts object which supports PyCapsule Interface."
+ raise AttributeError(msg) from e
+ return from_native(native, eager_only=True)
+ msg = (
+ f"{implementation} support in Narwhals is lazy-only, but `from_arrow` is an eager-only function.\n\n"
+ "Hint: you may want to use an eager backend and then call `.lazy`, e.g.:\n\n"
+ f" nw.from_arrow(df, backend='pyarrow').lazy('{implementation}')"
+ )
+ raise ValueError(msg)
+
+
+def _get_sys_info() -> dict[str, str]:
+ """System information.
+
+ Returns system and Python version information
+
+ Copied from sklearn
+
+ Returns:
+ Dictionary with system info.
+ """
+ python = sys.version.replace("\n", " ")
+
+ blob = (
+ ("python", python),
+ ("executable", sys.executable),
+ ("machine", platform.platform()),
+ )
+
+ return dict(blob)
+
+
+def _get_deps_info() -> dict[str, str]:
+ """Overview of the installed version of main dependencies.
+
+ This function does not import the modules to collect the version numbers
+ but instead relies on standard Python package metadata.
+
+ Returns version information on relevant Python libraries
+
+ This function and show_versions were copied from sklearn and adapted
+
+ Returns:
+ Mapping from dependency to version.
+ """
+ from importlib.metadata import PackageNotFoundError, version
+
+ from narwhals import __version__
+
+ deps = ("pandas", "polars", "cudf", "modin", "pyarrow", "numpy")
+ deps_info = {"narwhals": __version__}
+
+ for modname in deps:
+ try:
+ deps_info[modname] = version(modname)
+ except PackageNotFoundError: # noqa: PERF203
+ deps_info[modname] = ""
+ return deps_info
+
+
+def show_versions() -> None:
+ """Print useful debugging information.
+
+ Examples:
+ >>> from narwhals import show_versions
+ >>> show_versions() # doctest: +SKIP
+ """
+ sys_info = _get_sys_info()
+ deps_info = _get_deps_info()
+
+ print("\nSystem:") # noqa: T201
+ for k, stat in sys_info.items():
+ print(f"{k:>10}: {stat}") # noqa: T201
+
+ print("\nPython dependencies:") # noqa: T201
+ for k, stat in deps_info.items():
+ print(f"{k:>13}: {stat}") # noqa: T201
+
+
+def get_level(
+ obj: DataFrame[Any] | LazyFrame[Any] | Series[IntoSeriesT],
+) -> Literal["full", "lazy", "interchange"]:
+ """Level of support Narwhals has for current object.
+
+ Arguments:
+ obj: Dataframe or Series.
+
+ Returns:
+ This can be one of
+
+ - 'full': full Narwhals API support
+ - 'lazy': only lazy operations are supported. This excludes anything
+ which involves iterating over rows in Python.
+ - 'interchange': only metadata operations are supported (`df.schema`)
+ """
+ return obj._level
+
+
+@deprecate_native_namespace(warn_version="1.27.2", required=True)
+def read_csv(
+ source: str,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None,
+ **kwargs: Any,
+) -> DataFrame[Any]:
+ """Read a CSV file into a DataFrame.
+
+ Arguments:
+ source: Path to a file.
+ backend: The eager backend for DataFrame creation.
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.27.2)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ kwargs: Extra keyword arguments which are passed to the native CSV reader.
+ For example, you could use
+ `nw.read_csv('file.csv', backend='pandas', engine='pyarrow')`.
+
+ Returns:
+ DataFrame.
+
+ Examples:
+ >>> import narwhals as nw
+ >>> nw.read_csv("file.csv", backend="pandas") # doctest:+SKIP
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 1 4 |
+ | 1 2 5 |
+ └──────────────────┘
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ eager_backend = Implementation.from_backend(backend)
+ native_namespace = eager_backend.to_native_namespace()
+ native_frame: NativeFrame
+ if eager_backend in {
+ Implementation.POLARS,
+ Implementation.PANDAS,
+ Implementation.MODIN,
+ Implementation.CUDF,
+ }:
+ native_frame = native_namespace.read_csv(source, **kwargs)
+ elif eager_backend is Implementation.PYARROW:
+ from pyarrow import csv # ignore-banned-import
+
+ native_frame = csv.read_csv(source, **kwargs)
+ else: # pragma: no cover
+ try:
+ # implementation is UNKNOWN, Narwhals extension using this feature should
+ # implement `read_csv` function in the top-level namespace.
+ native_frame = native_namespace.read_csv(source=source, **kwargs)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `read_csv` function."
+ raise AttributeError(msg) from e
+ return from_native(native_frame, eager_only=True)
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def scan_csv(
+ source: str,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None,
+ **kwargs: Any,
+) -> LazyFrame[Any]:
+ """Lazily read from a CSV file.
+
+ For the libraries that do not support lazy dataframes, the function reads
+ a csv file eagerly and then converts the resulting dataframe to a lazyframe.
+
+ Arguments:
+ source: Path to a file.
+ backend: The eager backend for DataFrame creation.
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ kwargs: Extra keyword arguments which are passed to the native CSV reader.
+ For example, you could use
+ `nw.scan_csv('file.csv', backend=pd, engine='pyarrow')`.
+
+ Returns:
+ LazyFrame.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>>
+ >>> nw.scan_csv("file.csv", backend="duckdb").to_native() # doctest:+SKIP
+ ┌─────────┬───────┐
+ │ a │ b │
+ │ varchar │ int32 │
+ ├─────────┼───────┤
+ │ x │ 1 │
+ │ y │ 2 │
+ │ z │ 3 │
+ └─────────┴───────┘
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ implementation = Implementation.from_backend(backend)
+ native_namespace = implementation.to_native_namespace()
+ native_frame: NativeFrame | NativeLazyFrame
+ if implementation is Implementation.POLARS:
+ native_frame = native_namespace.scan_csv(source, **kwargs)
+ elif implementation in {
+ Implementation.PANDAS,
+ Implementation.MODIN,
+ Implementation.CUDF,
+ Implementation.DASK,
+ Implementation.DUCKDB,
+ Implementation.IBIS,
+ }:
+ native_frame = native_namespace.read_csv(source, **kwargs)
+ elif implementation is Implementation.PYARROW:
+ from pyarrow import csv # ignore-banned-import
+
+ native_frame = csv.read_csv(source, **kwargs)
+ elif implementation.is_spark_like():
+ if (session := kwargs.pop("session", None)) is None:
+ msg = "Spark like backends require a session object to be passed in `kwargs`."
+ raise ValueError(msg)
+
+ csv_reader = session.read.format("csv")
+ native_frame = (
+ csv_reader.load(source)
+ if (
+ implementation is Implementation.SQLFRAME
+ and parse_version(version("sqlframe")) < (3, 27, 0)
+ )
+ else csv_reader.options(**kwargs).load(source)
+ )
+ else: # pragma: no cover
+ try:
+ # implementation is UNKNOWN, Narwhals extension using this feature should
+ # implement `scan_csv` function in the top-level namespace.
+ native_frame = native_namespace.scan_csv(source=source, **kwargs)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `scan_csv` function."
+ raise AttributeError(msg) from e
+ return from_native(native_frame).lazy()
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def read_parquet(
+ source: str,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None,
+ **kwargs: Any,
+) -> DataFrame[Any]:
+ """Read into a DataFrame from a parquet file.
+
+ Arguments:
+ source: Path to a file.
+ backend: The eager backend for DataFrame creation.
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ kwargs: Extra keyword arguments which are passed to the native parquet reader.
+ For example, you could use
+ `nw.read_parquet('file.parquet', backend=pd, engine='pyarrow')`.
+
+ Returns:
+ DataFrame.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> nw.read_parquet("file.parquet", backend="pyarrow") # doctest:+SKIP
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ |pyarrow.Table |
+ |a: int64 |
+ |c: double |
+ |---- |
+ |a: [[1,2]] |
+ |c: [[0.2,0.1]] |
+ └──────────────────┘
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ implementation = Implementation.from_backend(backend)
+ native_namespace = implementation.to_native_namespace()
+ native_frame: NativeFrame
+ if implementation in {
+ Implementation.POLARS,
+ Implementation.PANDAS,
+ Implementation.MODIN,
+ Implementation.CUDF,
+ Implementation.DUCKDB,
+ Implementation.IBIS,
+ }:
+ native_frame = native_namespace.read_parquet(source, **kwargs)
+ elif implementation is Implementation.PYARROW:
+ import pyarrow.parquet as pq # ignore-banned-import
+
+ native_frame = pq.read_table(source, **kwargs)
+ else: # pragma: no cover
+ try:
+ # implementation is UNKNOWN, Narwhals extension using this feature should
+ # implement `read_parquet` function in the top-level namespace.
+ native_frame = native_namespace.read_parquet(source=source, **kwargs)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `read_parquet` function."
+ raise AttributeError(msg) from e
+ return from_native(native_frame, eager_only=True)
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def scan_parquet(
+ source: str,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None,
+ **kwargs: Any,
+) -> LazyFrame[Any]:
+ """Lazily read from a parquet file.
+
+ For the libraries that do not support lazy dataframes, the function reads
+ a parquet file eagerly and then converts the resulting dataframe to a lazyframe.
+
+ Note:
+ Spark like backends require a session object to be passed in `kwargs`.
+
+ For instance:
+
+ ```py
+ import narwhals as nw
+ from sqlframe.duckdb import DuckDBSession
+
+ nw.scan_parquet(source, backend="sqlframe", session=DuckDBSession())
+ ```
+
+ Arguments:
+ source: Path to a file.
+ backend: The eager backend for DataFrame creation.
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN`, `CUDF`, `PYSPARK` or `SQLFRAME`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"`, `"cudf"`,
+ `"pyspark"` or `"sqlframe"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin`, `cudf`,
+ `pyspark.sql` or `sqlframe`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ kwargs: Extra keyword arguments which are passed to the native parquet reader.
+ For example, you could use
+ `nw.scan_parquet('file.parquet', backend=pd, engine='pyarrow')`.
+
+ Returns:
+ LazyFrame.
+
+ Examples:
+ >>> import dask.dataframe as dd
+ >>> from sqlframe.duckdb import DuckDBSession
+ >>> import narwhals as nw
+ >>>
+ >>> nw.scan_parquet("file.parquet", backend="dask").collect() # doctest:+SKIP
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 1 4 |
+ | 1 2 5 |
+ └──────────────────┘
+ >>> nw.scan_parquet(
+ ... "file.parquet", backend="sqlframe", session=DuckDBSession()
+ ... ).collect() # doctest:+SKIP
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | pyarrow.Table |
+ | a: int64 |
+ | b: int64 |
+ | ---- |
+ | a: [[1,2]] |
+ | b: [[4,5]] |
+ └──────────────────┘
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ implementation = Implementation.from_backend(backend)
+ native_namespace = implementation.to_native_namespace()
+ native_frame: NativeFrame | NativeLazyFrame
+ if implementation is Implementation.POLARS:
+ native_frame = native_namespace.scan_parquet(source, **kwargs)
+ elif implementation in {
+ Implementation.PANDAS,
+ Implementation.MODIN,
+ Implementation.CUDF,
+ Implementation.DASK,
+ Implementation.DUCKDB,
+ Implementation.IBIS,
+ }:
+ native_frame = native_namespace.read_parquet(source, **kwargs)
+ elif implementation is Implementation.PYARROW:
+ import pyarrow.parquet as pq # ignore-banned-import
+
+ native_frame = pq.read_table(source, **kwargs)
+ elif implementation.is_spark_like():
+ if (session := kwargs.pop("session", None)) is None:
+ msg = "Spark like backends require a session object to be passed in `kwargs`."
+ raise ValueError(msg)
+
+ pq_reader = session.read.format("parquet")
+ native_frame = (
+ pq_reader.load(source)
+ if (
+ implementation is Implementation.SQLFRAME
+ and parse_version(version("sqlframe")) < (3, 27, 0)
+ )
+ else pq_reader.options(**kwargs).load(source)
+ )
+
+ else: # pragma: no cover
+ try:
+ # implementation is UNKNOWN, Narwhals extension using this feature should
+ # implement `scan_parquet` function in the top-level namespace.
+ native_frame = native_namespace.scan_parquet(source=source, **kwargs)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `scan_parquet` function."
+ raise AttributeError(msg) from e
+ return from_native(native_frame).lazy()
+
+
+def col(*names: str | Iterable[str]) -> Expr:
+ """Creates an expression that references one or more columns by their name(s).
+
+ Arguments:
+ names: Name(s) of the columns to use.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": ["x", "z"]})
+ >>> nw.from_native(df_native).select(nw.col("a", "b") * nw.col("b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | shape: (2, 2) |
+ | ┌─────┬─────┐ |
+ | │ a ┆ b │ |
+ | │ --- ┆ --- │ |
+ | │ i64 ┆ i64 │ |
+ | ╞═════╪═════╡ |
+ | │ 3 ┆ 9 │ |
+ | │ 8 ┆ 16 │ |
+ | └─────┴─────┘ |
+ └──────────────────┘
+ """
+ flat_names = flatten(names)
+
+ def func(plx: Any) -> Any:
+ return plx.col(*flat_names)
+
+ return Expr(
+ func,
+ ExprMetadata.selector_single()
+ if len(flat_names) == 1
+ else ExprMetadata.selector_multi_named(),
+ )
+
+
+def exclude(*names: str | Iterable[str]) -> Expr:
+ """Creates an expression that excludes columns by their name(s).
+
+ Arguments:
+ names: Name(s) of the columns to exclude.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": ["x", "z"]})
+ >>> nw.from_native(df_native).select(nw.exclude("c", "a"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | shape: (2, 1) |
+ | ┌─────┐ |
+ | │ b │ |
+ | │ --- │ |
+ | │ i64 │ |
+ | ╞═════╡ |
+ | │ 3 │ |
+ | │ 4 │ |
+ | └─────┘ |
+ └──────────────────┘
+ """
+ exclude_names = frozenset(flatten(names))
+
+ def func(plx: Any) -> Any:
+ return plx.exclude(exclude_names)
+
+ return Expr(func, ExprMetadata.selector_multi_unnamed())
+
+
+def nth(*indices: int | Sequence[int]) -> Expr:
+ """Creates an expression that references one or more columns by their index(es).
+
+ Notes:
+ `nth` is not supported for Polars version<1.0.0. Please use
+ [`narwhals.col`][] instead.
+
+ Arguments:
+ indices: One or more indices representing the columns to retrieve.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pa.table({"a": [1, 2], "b": [3, 4], "c": [0.123, 3.14]})
+ >>> nw.from_native(df_native).select(nw.nth(0, 2) * 2)
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ |pyarrow.Table |
+ |a: int64 |
+ |c: double |
+ |---- |
+ |a: [[2,4]] |
+ |c: [[0.246,6.28]] |
+ └──────────────────┘
+ """
+ flat_indices = flatten(indices)
+
+ def func(plx: Any) -> Any:
+ return plx.nth(*flat_indices)
+
+ return Expr(
+ func,
+ ExprMetadata.selector_single()
+ if len(flat_indices) == 1
+ else ExprMetadata.selector_multi_unnamed(),
+ )
+
+
+# Add underscore so it doesn't conflict with builtin `all`
+def all_() -> Expr:
+ """Instantiate an expression representing all columns.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": [3.14, 0.123]})
+ >>> nw.from_native(df_native).select(nw.all() * 2)
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 2 6.280 |
+ | 1 4 0.246 |
+ └──────────────────┘
+ """
+ return Expr(lambda plx: plx.all(), ExprMetadata.selector_multi_unnamed())
+
+
+# Add underscore so it doesn't conflict with builtin `len`
+def len_() -> Expr:
+ """Return the number of rows.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pl.DataFrame({"a": [1, 2], "b": [5, None]})
+ >>> nw.from_native(df_native).select(nw.len())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | shape: (1, 1) |
+ | ┌─────┐ |
+ | │ len │ |
+ | │ --- │ |
+ | │ u32 │ |
+ | ╞═════╡ |
+ | │ 2 │ |
+ | └─────┘ |
+ └──────────────────┘
+ """
+
+ def func(plx: Any) -> Any:
+ return plx.len()
+
+ return Expr(func, ExprMetadata.aggregation())
+
+
+def sum(*columns: str) -> Expr:
+ """Sum all values.
+
+ Note:
+ Syntactic sugar for ``nw.col(columns).sum()``
+
+ Arguments:
+ columns: Name(s) of the columns to use in the aggregation function
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": [-1.4, 6.2]})
+ >>> nw.from_native(df_native).select(nw.sum("a", "b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 3 4.8 |
+ └──────────────────┘
+ """
+ return col(*columns).sum()
+
+
+def mean(*columns: str) -> Expr:
+ """Get the mean value.
+
+ Note:
+ Syntactic sugar for ``nw.col(columns).mean()``
+
+ Arguments:
+ columns: Name(s) of the columns to use in the aggregation function
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pa.table({"a": [1, 8, 3], "b": [3.14, 6.28, 42.1]})
+ >>> nw.from_native(df_native).select(nw.mean("a", "b"))
+ ┌─────────────────────────┐
+ | Narwhals DataFrame |
+ |-------------------------|
+ |pyarrow.Table |
+ |a: double |
+ |b: double |
+ |---- |
+ |a: [[4]] |
+ |b: [[17.173333333333336]]|
+ └─────────────────────────┘
+ """
+ return col(*columns).mean()
+
+
+def median(*columns: str) -> Expr:
+ """Get the median value.
+
+ Notes:
+ - Syntactic sugar for ``nw.col(columns).median()``
+ - Results might slightly differ across backends due to differences in the
+ underlying algorithms used to compute the median.
+
+ Arguments:
+ columns: Name(s) of the columns to use in the aggregation function
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pl.DataFrame({"a": [4, 5, 2]})
+ >>> nw.from_native(df_native).select(nw.median("a"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | shape: (1, 1) |
+ | ┌─────┐ |
+ | │ a │ |
+ | │ --- │ |
+ | │ f64 │ |
+ | ╞═════╡ |
+ | │ 4.0 │ |
+ | └─────┘ |
+ └──────────────────┘
+ """
+ return col(*columns).median()
+
+
+def min(*columns: str) -> Expr:
+ """Return the minimum value.
+
+ Note:
+ Syntactic sugar for ``nw.col(columns).min()``.
+
+ Arguments:
+ columns: Name(s) of the columns to use in the aggregation function.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pa.table({"a": [1, 2], "b": [5, 10]})
+ >>> nw.from_native(df_native).select(nw.min("a", "b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | pyarrow.Table |
+ | a: int64 |
+ | b: int64 |
+ | ---- |
+ | a: [[1]] |
+ | b: [[5]] |
+ └──────────────────┘
+ """
+ return col(*columns).min()
+
+
+def max(*columns: str) -> Expr:
+ """Return the maximum value.
+
+ Note:
+ Syntactic sugar for ``nw.col(columns).max()``.
+
+ Arguments:
+ columns: Name(s) of the columns to use in the aggregation function.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
+ >>> nw.from_native(df_native).select(nw.max("a", "b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 2 10 |
+ └──────────────────┘
+ """
+ return col(*columns).max()
+
+
+def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ """Sum all values horizontally across columns.
+
+ Warning:
+ Unlike Polars, we support horizontal sum over numeric columns only.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pl.DataFrame({"a": [1, 2, 3], "b": [5, 10, None]})
+ >>> nw.from_native(df_native).with_columns(sum=nw.sum_horizontal("a", "b"))
+ ┌────────────────────┐
+ | Narwhals DataFrame |
+ |--------------------|
+ |shape: (3, 3) |
+ |┌─────┬──────┬─────┐|
+ |│ a ┆ b ┆ sum │|
+ |│ --- ┆ --- ┆ --- │|
+ |│ i64 ┆ i64 ┆ i64 │|
+ |╞═════╪══════╪═════╡|
+ |│ 1 ┆ 5 ┆ 6 │|
+ |│ 2 ┆ 10 ┆ 12 │|
+ |│ 3 ┆ null ┆ 3 │|
+ |└─────┴──────┴─────┘|
+ └────────────────────┘
+ """
+ if not exprs:
+ msg = "At least one expression must be passed to `sum_horizontal`"
+ raise ValueError(msg)
+ flat_exprs = flatten(exprs)
+ return Expr(
+ lambda plx: apply_n_ary_operation(
+ plx, plx.sum_horizontal, *flat_exprs, str_as_lit=False
+ ),
+ ExprMetadata.from_horizontal_op(*flat_exprs),
+ )
+
+
+def min_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ """Get the minimum value horizontally across columns.
+
+ Notes:
+ We support `min_horizontal` over numeric columns only.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pa.table({"a": [1, 8, 3], "b": [4, 5, None]})
+ >>> nw.from_native(df_native).with_columns(h_min=nw.min_horizontal("a", "b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | pyarrow.Table |
+ | a: int64 |
+ | b: int64 |
+ | h_min: int64 |
+ | ---- |
+ | a: [[1,8,3]] |
+ | b: [[4,5,null]] |
+ | h_min: [[1,5,3]] |
+ └──────────────────┘
+ """
+ if not exprs:
+ msg = "At least one expression must be passed to `min_horizontal`"
+ raise ValueError(msg)
+ flat_exprs = flatten(exprs)
+ return Expr(
+ lambda plx: apply_n_ary_operation(
+ plx, plx.min_horizontal, *flat_exprs, str_as_lit=False
+ ),
+ ExprMetadata.from_horizontal_op(*flat_exprs),
+ )
+
+
+def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ """Get the maximum value horizontally across columns.
+
+ Notes:
+ We support `max_horizontal` over numeric columns only.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, None]})
+ >>> nw.from_native(df_native).with_columns(h_max=nw.max_horizontal("a", "b"))
+ ┌──────────────────────┐
+ | Narwhals DataFrame |
+ |----------------------|
+ |shape: (3, 3) |
+ |┌─────┬──────┬───────┐|
+ |│ a ┆ b ┆ h_max │|
+ |│ --- ┆ --- ┆ --- │|
+ |│ i64 ┆ i64 ┆ i64 │|
+ |╞═════╪══════╪═══════╡|
+ |│ 1 ┆ 4 ┆ 4 │|
+ |│ 8 ┆ 5 ┆ 8 │|
+ |│ 3 ┆ null ┆ 3 │|
+ |└─────┴──────┴───────┘|
+ └──────────────────────┘
+ """
+ if not exprs:
+ msg = "At least one expression must be passed to `max_horizontal`"
+ raise ValueError(msg)
+ flat_exprs = flatten(exprs)
+ return Expr(
+ lambda plx: apply_n_ary_operation(
+ plx, plx.max_horizontal, *flat_exprs, str_as_lit=False
+ ),
+ ExprMetadata.from_horizontal_op(*flat_exprs),
+ )
+
+
+class When:
+ def __init__(self, *predicates: IntoExpr | Iterable[IntoExpr]) -> None:
+ self._predicate = all_horizontal(*flatten(predicates))
+
+ def then(self, value: IntoExpr | NonNestedLiteral | _1DArray) -> Then:
+ kind = ExprKind.from_into_expr(value, str_as_lit=False)
+ if self._predicate._metadata.is_scalar_like and not kind.is_scalar_like:
+ msg = (
+ "If you pass a scalar-like predicate to `nw.when`, then "
+ "the `then` value must also be scalar-like."
+ )
+ raise ShapeError(msg)
+
+ return Then(
+ lambda plx: apply_n_ary_operation(
+ plx,
+ lambda *args: plx.when(args[0]).then(args[1]),
+ self._predicate,
+ value,
+ str_as_lit=False,
+ ),
+ combine_metadata(
+ self._predicate,
+ value,
+ str_as_lit=False,
+ allow_multi_output=False,
+ to_single_output=False,
+ ),
+ )
+
+
+class Then(Expr):
+ def otherwise(self, value: IntoExpr | NonNestedLiteral | _1DArray) -> Expr:
+ kind = ExprKind.from_into_expr(value, str_as_lit=False)
+ if self._metadata.is_scalar_like and not is_scalar_like(kind):
+ msg = (
+ "If you pass a scalar-like predicate to `nw.when`, then "
+ "the `otherwise` value must also be scalar-like."
+ )
+ raise ShapeError(msg)
+
+ def func(plx: CompliantNamespace[Any, Any]) -> CompliantExpr[Any, Any]:
+ compliant_expr = self._to_compliant_expr(plx)
+ compliant_value = extract_compliant(plx, value, str_as_lit=False)
+ if (
+ not self._metadata.is_scalar_like
+ and is_scalar_like(kind)
+ and is_compliant_expr(compliant_value)
+ ):
+ compliant_value = compliant_value.broadcast(kind)
+ return compliant_expr.otherwise(compliant_value) # type: ignore[attr-defined, no-any-return]
+
+ return Expr(
+ func,
+ combine_metadata(
+ self,
+ value,
+ str_as_lit=False,
+ allow_multi_output=False,
+ to_single_output=False,
+ ),
+ )
+
+
+def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
+ """Start a `when-then-otherwise` expression.
+
+ Expression similar to an `if-else` statement in Python. Always initiated by a
+ `pl.when(<condition>).then(<value if condition>)`, and optionally followed by a
+ `.otherwise(<value if condition is false>)` can be appended at the end. If not
+ appended, and the condition is not `True`, `None` will be returned.
+
+ Info:
+ Chaining multiple `.when(<condition>).then(<value>)` statements is currently
+ not supported.
+ See [Narwhals#668](https://github.com/narwhals-dev/narwhals/issues/668).
+
+ Arguments:
+ predicates: Condition(s) that must be met in order to apply the subsequent
+ statement. Accepts one or more boolean expressions, which are implicitly
+ combined with `&`. String input is parsed as a column name.
+
+ Returns:
+ A "when" object, which `.then` can be called on.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> data = {"a": [1, 2, 3], "b": [5, 10, 15]}
+ >>> df_native = pd.DataFrame(data)
+ >>> nw.from_native(df_native).with_columns(
+ ... nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when")
+ ... )
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b a_when |
+ | 0 1 5 5 |
+ | 1 2 10 5 |
+ | 2 3 15 6 |
+ └──────────────────┘
+ """
+ return When(*predicates)
+
+
+def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ r"""Compute the bitwise AND horizontally across columns.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> data = {
+ ... "a": [False, False, True, True, False, None],
+ ... "b": [False, True, True, None, None, None],
+ ... }
+ >>> df_native = pa.table(data)
+ >>> nw.from_native(df_native).select("a", "b", all=nw.all_horizontal("a", "b"))
+ ┌─────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------------------------|
+ |pyarrow.Table |
+ |a: bool |
+ |b: bool |
+ |all: bool |
+ |---- |
+ |a: [[false,false,true,true,false,null]] |
+ |b: [[false,true,true,null,null,null]] |
+ |all: [[false,false,true,null,false,null]]|
+ └─────────────────────────────────────────┘
+
+ """
+ if not exprs:
+ msg = "At least one expression must be passed to `all_horizontal`"
+ raise ValueError(msg)
+ flat_exprs = flatten(exprs)
+ return Expr(
+ lambda plx: apply_n_ary_operation(
+ plx, plx.all_horizontal, *flat_exprs, str_as_lit=False
+ ),
+ ExprMetadata.from_horizontal_op(*flat_exprs),
+ )
+
+
+def lit(value: NonNestedLiteral, dtype: IntoDType | None = None) -> Expr:
+ """Return an expression representing a literal value.
+
+ Arguments:
+ value: The value to use as literal.
+ dtype: The data type of the literal value. If not provided, the data type will
+ be inferred by the native library.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pd.DataFrame({"a": [1, 2]})
+ >>> nw.from_native(df_native).with_columns(nw.lit(3))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a literal |
+ | 0 1 3 |
+ | 1 2 3 |
+ └──────────────────┘
+ """
+ if is_numpy_array(value):
+ msg = (
+ "numpy arrays are not supported as literal values. "
+ "Consider using `with_columns` to create a new column from the array."
+ )
+ raise ValueError(msg)
+
+ if isinstance(value, (list, tuple)):
+ msg = f"Nested datatypes are not supported yet. Got {value}"
+ raise NotImplementedError(msg)
+
+ return Expr(lambda plx: plx.lit(value, dtype), ExprMetadata.literal())
+
+
+def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ r"""Compute the bitwise OR horizontally across columns.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> data = {
+ ... "a": [False, False, True, True, False, None],
+ ... "b": [False, True, True, None, None, None],
+ ... }
+ >>> df_native = pl.DataFrame(data)
+ >>> nw.from_native(df_native).select("a", "b", any=nw.any_horizontal("a", "b"))
+ ┌─────────────────────────┐
+ | Narwhals DataFrame |
+ |-------------------------|
+ |shape: (6, 3) |
+ |┌───────┬───────┬───────┐|
+ |│ a ┆ b ┆ any │|
+ |│ --- ┆ --- ┆ --- │|
+ |│ bool ┆ bool ┆ bool │|
+ |╞═══════╪═══════╪═══════╡|
+ |│ false ┆ false ┆ false │|
+ |│ false ┆ true ┆ true │|
+ |│ true ┆ true ┆ true │|
+ |│ true ┆ null ┆ true │|
+ |│ false ┆ null ┆ null │|
+ |│ null ┆ null ┆ null │|
+ |└───────┴───────┴───────┘|
+ └─────────────────────────┘
+ """
+ if not exprs:
+ msg = "At least one expression must be passed to `any_horizontal`"
+ raise ValueError(msg)
+ flat_exprs = flatten(exprs)
+ return Expr(
+ lambda plx: apply_n_ary_operation(
+ plx, plx.any_horizontal, *flat_exprs, str_as_lit=False
+ ),
+ ExprMetadata.from_horizontal_op(*flat_exprs),
+ )
+
+
+def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ """Compute the mean of all values horizontally across columns.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> data = {"a": [1, 8, 3], "b": [4, 5, None], "c": ["x", "y", "z"]}
+ >>> df_native = pa.table(data)
+
+ We define a dataframe-agnostic function that computes the horizontal mean of "a"
+ and "b" columns:
+
+ >>> nw.from_native(df_native).select(nw.mean_horizontal("a", "b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | pyarrow.Table |
+ | a: double |
+ | ---- |
+ | a: [[2.5,6.5,3]] |
+ └──────────────────┘
+ """
+ if not exprs:
+ msg = "At least one expression must be passed to `mean_horizontal`"
+ raise ValueError(msg)
+ flat_exprs = flatten(exprs)
+ return Expr(
+ lambda plx: apply_n_ary_operation(
+ plx, plx.mean_horizontal, *flat_exprs, str_as_lit=False
+ ),
+ ExprMetadata.from_horizontal_op(*flat_exprs),
+ )
+
+
+def concat_str(
+ exprs: IntoExpr | Iterable[IntoExpr],
+ *more_exprs: IntoExpr,
+ separator: str = "",
+ ignore_nulls: bool = False,
+) -> Expr:
+ r"""Horizontally concatenate columns into a single string column.
+
+ Arguments:
+ exprs: Columns to concatenate into a single string column. Accepts expression
+ input. Strings are parsed as column names, other non-expression inputs are
+ parsed as literals. Non-`String` columns are cast to `String`.
+ *more_exprs: Additional columns to concatenate into a single string column,
+ specified as positional arguments.
+ separator: String that will be used to separate the values of each column.
+ ignore_nulls: Ignore null values (default is `False`).
+ If set to `False`, null values will be propagated and if the row contains any
+ null values, the output is null.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> data = {
+ ... "a": [1, 2, 3],
+ ... "b": ["dogs", "cats", None],
+ ... "c": ["play", "swim", "walk"],
+ ... }
+ >>> df_native = pd.DataFrame(data)
+ >>> (
+ ... nw.from_native(df_native).select(
+ ... nw.concat_str(
+ ... [nw.col("a") * 2, nw.col("b"), nw.col("c")], separator=" "
+ ... ).alias("full_sentence")
+ ... )
+ ... )
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | full_sentence |
+ | 0 2 dogs play |
+ | 1 4 cats swim |
+ | 2 None |
+ └──────────────────┘
+ """
+ flat_exprs = flatten([*flatten([exprs]), *more_exprs])
+ return Expr(
+ lambda plx: apply_n_ary_operation(
+ plx,
+ lambda *args: plx.concat_str(
+ *args, separator=separator, ignore_nulls=ignore_nulls
+ ),
+ *flat_exprs,
+ str_as_lit=False,
+ ),
+ combine_metadata(
+ *flat_exprs, str_as_lit=False, allow_multi_output=True, to_single_output=True
+ ),
+ )