add code for analysis of data

author: sotech117 <michael_foiani@brown.edu> 2025-07-31 17:27:24 -0400
committer: sotech117 <michael_foiani@brown.edu> 2025-07-31 17:27:24 -0400
commit: 5bf22fc7e3c392c8bd44315ca2d06d7dca7d084e (patch)
tree: 8dacb0f195df1c0788d36dd0064f6bbaa3143ede /venv/lib/python3.8/site-packages/narwhals/functions.py
parent: b832d364da8c2efe09e3f75828caf73c50d01ce3 (diff)
1 files changed, 1793 insertions, 0 deletions
diff --git a/venv/lib/python3.8/site-packages/narwhals/functions.py b/venv/lib/python3.8/site-packages/narwhals/functions.py
new file mode 100644
index 0000000..b483236
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/functions.py
@@ -0,0 +1,1793 @@
+from __future__ import annotations
+
+import platform
+import sys
+from importlib.metadata import version
+from typing import TYPE_CHECKING, Any, Iterable, Literal, Mapping, Sequence, cast
+
+from narwhals._expression_parsing import (
+    ExprKind,
+    ExprMetadata,
+    apply_n_ary_operation,
+    combine_metadata,
+    extract_compliant,
+    is_scalar_like,
+)
+from narwhals._utils import (
+    Implementation,
+    Version,
+    deprecate_native_namespace,
+    flatten,
+    is_compliant_expr,
+    is_eager_allowed,
+    is_sequence_but_not_str,
+    parse_version,
+    supports_arrow_c_stream,
+    validate_laziness,
+)
+from narwhals.dependencies import (
+    is_narwhals_series,
+    is_numpy_array,
+    is_numpy_array_2d,
+    is_pyarrow_table,
+)
+from narwhals.exceptions import InvalidOperationError, ShapeError
+from narwhals.expr import Expr
+from narwhals.translate import from_native, to_native
+
+if TYPE_CHECKING:
+    from types import ModuleType
+
+    from typing_extensions import TypeAlias, TypeIs
+
+    from narwhals._compliant import CompliantExpr, CompliantNamespace
+    from narwhals._translate import IntoArrowTable
+    from narwhals.dataframe import DataFrame, LazyFrame
+    from narwhals.dtypes import DType
+    from narwhals.schema import Schema
+    from narwhals.series import Series
+    from narwhals.typing import (
+        ConcatMethod,
+        FrameT,
+        IntoDType,
+        IntoExpr,
+        IntoSeriesT,
+        NativeFrame,
+        NativeLazyFrame,
+        NativeSeries,
+        NonNestedLiteral,
+        _1DArray,
+        _2DArray,
+    )
+
+    _IntoSchema: TypeAlias = "Mapping[str, DType] | Schema | Sequence[str] | None"
+
+
+def concat(items: Iterable[FrameT], *, how: ConcatMethod = "vertical") -> FrameT:
+    """Concatenate multiple DataFrames, LazyFrames into a single entity.
+
+    Arguments:
+        items: DataFrames, LazyFrames to concatenate.
+        how: concatenating strategy
+
+            - vertical: Concatenate vertically. Column names must match.
+            - horizontal: Concatenate horizontally. If lengths don't match, then
+                missing rows are filled with null values. This is only supported
+                when all inputs are (eager) DataFrames.
+            - diagonal: Finds a union between the column schemas and fills missing column
+                values with null.
+
+    Returns:
+        A new DataFrame or LazyFrame resulting from the concatenation.
+
+    Raises:
+        TypeError: The items to concatenate should either all be eager, or all lazy
+
+    Examples:
+        Let's take an example of vertical concatenation:
+
+        >>> import pandas as pd
+        >>> import polars as pl
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
+
+        Let's look at one case a for vertical concatenation (pandas backed):
+
+        >>> df_pd_1 = nw.from_native(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}))
+        >>> df_pd_2 = nw.from_native(pd.DataFrame({"a": [5, 2], "b": [1, 4]}))
+        >>> nw.concat([df_pd_1, df_pd_2], how="vertical")
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |        a  b      |
+        |     0  1  4      |
+        |     1  2  5      |
+        |     2  3  6      |
+        |     0  5  1      |
+        |     1  2  4      |
+        └──────────────────┘
+
+        Let's look at one case a for horizontal concatenation (polars backed):
+
+        >>> df_pl_1 = nw.from_native(pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}))
+        >>> df_pl_2 = nw.from_native(pl.DataFrame({"c": [5, 2], "d": [1, 4]}))
+        >>> nw.concat([df_pl_1, df_pl_2], how="horizontal")
+        ┌───────────────────────────┐
+        |    Narwhals DataFrame     |
+        |---------------------------|
+        |shape: (3, 4)              |
+        |┌─────┬─────┬──────┬──────┐|
+        |│ a   ┆ b   ┆ c    ┆ d    │|
+        |│ --- ┆ --- ┆ ---  ┆ ---  │|
+        |│ i64 ┆ i64 ┆ i64  ┆ i64  │|
+        |╞═════╪═════╪══════╪══════╡|
+        |│ 1   ┆ 4   ┆ 5    ┆ 1    │|
+        |│ 2   ┆ 5   ┆ 2    ┆ 4    │|
+        |│ 3   ┆ 6   ┆ null ┆ null │|
+        |└─────┴─────┴──────┴──────┘|
+        └───────────────────────────┘
+
+        Let's look at one case a for diagonal concatenation (pyarrow backed):
+
+        >>> df_pa_1 = nw.from_native(pa.table({"a": [1, 2], "b": [3.5, 4.5]}))
+        >>> df_pa_2 = nw.from_native(pa.table({"a": [3, 4], "z": ["x", "y"]}))
+        >>> nw.concat([df_pa_1, df_pa_2], how="diagonal")
+        ┌──────────────────────────┐
+        |    Narwhals DataFrame    |
+        |--------------------------|
+        |pyarrow.Table             |
+        |a: int64                  |
+        |b: double                 |
+        |z: string                 |
+        |----                      |
+        |a: [[1,2],[3,4]]          |
+        |b: [[3.5,4.5],[null,null]]|
+        |z: [[null,null],["x","y"]]|
+        └──────────────────────────┘
+    """
+    from narwhals.dependencies import is_narwhals_lazyframe
+
+    if not items:
+        msg = "No items to concatenate."
+        raise ValueError(msg)
+    items = list(items)
+    validate_laziness(items)
+    if how not in {"horizontal", "vertical", "diagonal"}:  # pragma: no cover
+        msg = "Only vertical, horizontal and diagonal concatenations are supported."
+        raise NotImplementedError(msg)
+    first_item = items[0]
+    if is_narwhals_lazyframe(first_item) and how == "horizontal":
+        msg = (
+            "Horizontal concatenation is not supported for LazyFrames.\n\n"
+            "Hint: you may want to use `join` instead."
+        )
+        raise InvalidOperationError(msg)
+    plx = first_item.__narwhals_namespace__()
+    return first_item._with_compliant(
+        plx.concat([df._compliant_frame for df in items], how=how)
+    )
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def new_series(
+    name: str,
+    values: Any,
+    dtype: IntoDType | None = None,
+    *,
+    backend: ModuleType | Implementation | str | None = None,
+    native_namespace: ModuleType | None = None,  # noqa: ARG001
+) -> Series[Any]:
+    """Instantiate Narwhals Series from iterable (e.g. list or array).
+
+    Arguments:
+        name: Name of resulting Series.
+        values: Values of make Series from.
+        dtype: (Narwhals) dtype. If not provided, the native library
+            may auto-infer it from `values`.
+        backend: specifies which eager backend instantiate to.
+
+            `backend` can be specified in various ways
+
+            - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+                `POLARS`, `MODIN` or `CUDF`.
+            - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+            - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+        native_namespace: The native library to use for DataFrame creation.
+
+            *Deprecated* (v1.31.0)
+
+            Please use `backend` instead. Note that `native_namespace` is still available
+            (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+            see [perfect backwards compatibility policy](../backcompat.md/).
+
+    Returns:
+        A new Series
+
+    Examples:
+        >>> import pandas as pd
+        >>> import narwhals as nw
+        >>>
+        >>> values = [4, 1, 2, 3]
+        >>> nw.new_series(name="a", values=values, dtype=nw.Int32, backend=pd)
+        ┌─────────────────────┐
+        |   Narwhals Series   |
+        |---------------------|
+        |0    4               |
+        |1    1               |
+        |2    2               |
+        |3    3               |
+        |Name: a, dtype: int32|
+        └─────────────────────┘
+    """
+    backend = cast("ModuleType | Implementation | str", backend)
+    return _new_series_impl(name, values, dtype, backend=backend)
+
+
+def _new_series_impl(
+    name: str,
+    values: Any,
+    dtype: IntoDType | None = None,
+    *,
+    backend: ModuleType | Implementation | str,
+) -> Series[Any]:
+    implementation = Implementation.from_backend(backend)
+    if is_eager_allowed(implementation):
+        ns = Version.MAIN.namespace.from_backend(implementation).compliant
+        series = ns._series.from_iterable(values, name=name, context=ns, dtype=dtype)
+        return series.to_narwhals()
+    elif implementation is Implementation.UNKNOWN:  # pragma: no cover
+        _native_namespace = implementation.to_native_namespace()
+        try:
+            native_series: NativeSeries = _native_namespace.new_series(
+                name, values, dtype
+            )
+            return from_native(native_series, series_only=True).alias(name)
+        except AttributeError as e:
+            msg = "Unknown namespace is expected to implement `new_series` constructor."
+            raise AttributeError(msg) from e
+    msg = (
+        f"{implementation} support in Narwhals is lazy-only, but `new_series` is an eager-only function.\n\n"
+        "Hint: you may want to use an eager backend and then call `.lazy`, e.g.:\n\n"
+        f"    nw.new_series('a', [1,2,3], backend='pyarrow').to_frame().lazy('{implementation}')"
+    )
+    raise ValueError(msg)
+
+
+@deprecate_native_namespace(warn_version="1.26.0")
+def from_dict(
+    data: Mapping[str, Any],
+    schema: Mapping[str, DType] | Schema | None = None,
+    *,
+    backend: ModuleType | Implementation | str | None = None,
+    native_namespace: ModuleType | None = None,  # noqa: ARG001
+) -> DataFrame[Any]:
+    """Instantiate DataFrame from dictionary.
+
+    Indexes (if present, for pandas-like backends) are aligned following
+    the [left-hand-rule](../concepts/pandas_index.md/).
+
+    Notes:
+        For pandas-like dataframes, conversion to schema is applied after dataframe
+        creation.
+
+    Arguments:
+        data: Dictionary to create DataFrame from.
+        schema: The DataFrame schema as Schema or dict of {name: type}. If not
+            specified, the schema will be inferred by the native library.
+        backend: specifies which eager backend instantiate to. Only
+            necessary if inputs are not Narwhals Series.
+
+            `backend` can be specified in various ways
+
+            - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+                `POLARS`, `MODIN` or `CUDF`.
+            - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+            - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+        native_namespace: The native library to use for DataFrame creation.
+
+            *Deprecated* (v1.26.0)
+
+            Please use `backend` instead. Note that `native_namespace` is still available
+            (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+            see [perfect backwards compatibility policy](../backcompat.md/).
+
+    Returns:
+        A new DataFrame.
+
+    Examples:
+        >>> import pandas as pd
+        >>> import narwhals as nw
+        >>> data = {"c": [5, 2], "d": [1, 4]}
+        >>> nw.from_dict(data, backend="pandas")
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |        c  d      |
+        |     0  5  1      |
+        |     1  2  4      |
+        └──────────────────┘
+    """
+    if not data:
+        msg = "from_dict cannot be called with empty dictionary"
+        raise ValueError(msg)
+    if backend is None:
+        data, backend = _from_dict_no_backend(data)
+    implementation = Implementation.from_backend(backend)
+    if is_eager_allowed(implementation):
+        ns = Version.MAIN.namespace.from_backend(implementation).compliant
+        return ns._dataframe.from_dict(data, schema=schema, context=ns).to_narwhals()
+    elif implementation is Implementation.UNKNOWN:  # pragma: no cover
+        _native_namespace = implementation.to_native_namespace()
+        try:
+            # implementation is UNKNOWN, Narwhals extension using this feature should
+            # implement `from_dict` function in the top-level namespace.
+            native_frame: NativeFrame = _native_namespace.from_dict(data, schema=schema)
+        except AttributeError as e:
+            msg = "Unknown namespace is expected to implement `from_dict` function."
+            raise AttributeError(msg) from e
+        return from_native(native_frame, eager_only=True)
+    msg = (
+        f"{implementation} support in Narwhals is lazy-only, but `from_dict` is an eager-only function.\n\n"
+        "Hint: you may want to use an eager backend and then call `.lazy`, e.g.:\n\n"
+        f"    nw.from_dict({{'a': [1, 2]}}, backend='pyarrow').lazy('{implementation}')"
+    )
+    raise ValueError(msg)
+
+
+def _from_dict_no_backend(
+    data: Mapping[str, Series[Any] | Any], /
+) -> tuple[dict[str, Series[Any] | Any], ModuleType]:
+    for val in data.values():
+        if is_narwhals_series(val):
+            native_namespace = val.__native_namespace__()
+            break
+    else:
+        msg = "Calling `from_dict` without `backend` is only supported if all input values are already Narwhals Series"
+        raise TypeError(msg)
+    data = {key: to_native(value, pass_through=True) for key, value in data.items()}
+    return data, native_namespace
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def from_numpy(
+    data: _2DArray,
+    schema: Mapping[str, DType] | Schema | Sequence[str] | None = None,
+    *,
+    backend: ModuleType | Implementation | str | None = None,
+    native_namespace: ModuleType | None = None,  # noqa: ARG001
+) -> DataFrame[Any]:
+    """Construct a DataFrame from a NumPy ndarray.
+
+    Notes:
+        Only row orientation is currently supported.
+
+        For pandas-like dataframes, conversion to schema is applied after dataframe
+        creation.
+
+    Arguments:
+        data: Two-dimensional data represented as a NumPy ndarray.
+        schema: The DataFrame schema as Schema, dict of {name: type}, or a sequence of str.
+        backend: specifies which eager backend instantiate to.
+
+            `backend` can be specified in various ways
+
+            - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+                `POLARS`, `MODIN` or `CUDF`.
+            - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+            - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+        native_namespace: The native library to use for DataFrame creation.
+
+            *Deprecated* (v1.31.0)
+
+            Please use `backend` instead. Note that `native_namespace` is still available
+            (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+            see [perfect backwards compatibility policy](../backcompat.md/).
+
+    Returns:
+        A new DataFrame.
+
+    Examples:
+        >>> import numpy as np
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
+        >>>
+        >>> arr = np.array([[5, 2, 1], [1, 4, 3]])
+        >>> schema = {"c": nw.Int16(), "d": nw.Float32(), "e": nw.Int8()}
+        >>> nw.from_numpy(arr, schema=schema, backend="pyarrow")
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |  pyarrow.Table   |
+        |  c: int16        |
+        |  d: float        |
+        |  e: int8         |
+        |  ----            |
+        |  c: [[5,1]]      |
+        |  d: [[2,4]]      |
+        |  e: [[1,3]]      |
+        └──────────────────┘
+    """
+    backend = cast("ModuleType | Implementation | str", backend)
+    if not is_numpy_array_2d(data):
+        msg = "`from_numpy` only accepts 2D numpy arrays"
+        raise ValueError(msg)
+    if not _is_into_schema(schema):
+        msg = (
+            "`schema` is expected to be one of the following types: "
+            "Mapping[str, DType] | Schema | Sequence[str]. "
+            f"Got {type(schema)}."
+        )
+        raise TypeError(msg)
+    implementation = Implementation.from_backend(backend)
+    if is_eager_allowed(implementation):
+        ns = Version.MAIN.namespace.from_backend(implementation).compliant
+        return ns.from_numpy(data, schema).to_narwhals()
+    elif implementation is Implementation.UNKNOWN:  # pragma: no cover
+        _native_namespace = implementation.to_native_namespace()
+        try:
+            # implementation is UNKNOWN, Narwhals extension using this feature should
+            # implement `from_numpy` function in the top-level namespace.
+            native_frame: NativeFrame = _native_namespace.from_numpy(data, schema=schema)
+        except AttributeError as e:
+            msg = "Unknown namespace is expected to implement `from_numpy` function."
+            raise AttributeError(msg) from e
+        return from_native(native_frame, eager_only=True)
+    msg = (
+        f"{implementation} support in Narwhals is lazy-only, but `from_numpy` is an eager-only function.\n\n"
+        "Hint: you may want to use an eager backend and then call `.lazy`, e.g.:\n\n"
+        f"    nw.from_numpy(arr, backend='pyarrow').lazy('{implementation}')"
+    )
+    raise ValueError(msg)
+
+
+def _is_into_schema(obj: Any) -> TypeIs[_IntoSchema]:
+    from narwhals.schema import Schema
+
+    return (
+        obj is None or isinstance(obj, (Mapping, Schema)) or is_sequence_but_not_str(obj)
+    )
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def from_arrow(
+    native_frame: IntoArrowTable,
+    *,
+    backend: ModuleType | Implementation | str | None = None,
+    native_namespace: ModuleType | None = None,  # noqa: ARG001
+) -> DataFrame[Any]:  # pragma: no cover
+    """Construct a DataFrame from an object which supports the PyCapsule Interface.
+
+    Arguments:
+        native_frame: Object which implements `__arrow_c_stream__`.
+        backend: specifies which eager backend instantiate to.
+
+            `backend` can be specified in various ways
+
+            - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+                `POLARS`, `MODIN` or `CUDF`.
+            - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+            - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+        native_namespace: The native library to use for DataFrame creation.
+
+            *Deprecated* (v1.31.0)
+
+            Please use `backend` instead. Note that `native_namespace` is still available
+            (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+            see [perfect backwards compatibility policy](../backcompat.md/).
+
+    Returns:
+        A new DataFrame.
+
+    Examples:
+        >>> import pandas as pd
+        >>> import polars as pl
+        >>> import narwhals as nw
+        >>>
+        >>> df_native = pd.DataFrame({"a": [1, 2], "b": [4.2, 5.1]})
+        >>> nw.from_arrow(df_native, backend="polars")
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |  shape: (2, 2)   |
+        |  ┌─────┬─────┐   |
+        |  │ a   ┆ b   │   |
+        |  │ --- ┆ --- │   |
+        |  │ i64 ┆ f64 │   |
+        |  ╞═════╪═════╡   |
+        |  │ 1   ┆ 4.2 │   |
+        |  │ 2   ┆ 5.1 │   |
+        |  └─────┴─────┘   |
+        └──────────────────┘
+    """
+    backend = cast("ModuleType | Implementation | str", backend)
+    if not (supports_arrow_c_stream(native_frame) or is_pyarrow_table(native_frame)):
+        msg = f"Given object of type {type(native_frame)} does not support PyCapsule interface"
+        raise TypeError(msg)
+    implementation = Implementation.from_backend(backend)
+    if is_eager_allowed(implementation):
+        ns = Version.MAIN.namespace.from_backend(implementation).compliant
+        return ns._dataframe.from_arrow(native_frame, context=ns).to_narwhals()
+    elif implementation is Implementation.UNKNOWN:  # pragma: no cover
+        _native_namespace = implementation.to_native_namespace()
+        try:
+            # implementation is UNKNOWN, Narwhals extension using this feature should
+            # implement PyCapsule support
+            native: NativeFrame = _native_namespace.DataFrame(native_frame)
+        except AttributeError as e:
+            msg = "Unknown namespace is expected to implement `DataFrame` class which accepts object which supports PyCapsule Interface."
+            raise AttributeError(msg) from e
+        return from_native(native, eager_only=True)
+    msg = (
+        f"{implementation} support in Narwhals is lazy-only, but `from_arrow` is an eager-only function.\n\n"
+        "Hint: you may want to use an eager backend and then call `.lazy`, e.g.:\n\n"
+        f"    nw.from_arrow(df, backend='pyarrow').lazy('{implementation}')"
+    )
+    raise ValueError(msg)
+
+
+def _get_sys_info() -> dict[str, str]:
+    """System information.
+
+    Returns system and Python version information
+
+    Copied from sklearn
+
+    Returns:
+        Dictionary with system info.
+    """
+    python = sys.version.replace("\n", " ")
+
+    blob = (
+        ("python", python),
+        ("executable", sys.executable),
+        ("machine", platform.platform()),
+    )
+
+    return dict(blob)
+
+
+def _get_deps_info() -> dict[str, str]:
+    """Overview of the installed version of main dependencies.
+
+    This function does not import the modules to collect the version numbers
+    but instead relies on standard Python package metadata.
+
+    Returns version information on relevant Python libraries
+
+    This function and show_versions were copied from sklearn and adapted
+
+    Returns:
+        Mapping from dependency to version.
+    """
+    from importlib.metadata import PackageNotFoundError, version
+
+    from narwhals import __version__
+
+    deps = ("pandas", "polars", "cudf", "modin", "pyarrow", "numpy")
+    deps_info = {"narwhals": __version__}
+
+    for modname in deps:
+        try:
+            deps_info[modname] = version(modname)
+        except PackageNotFoundError:  # noqa: PERF203
+            deps_info[modname] = ""
+    return deps_info
+
+
+def show_versions() -> None:
+    """Print useful debugging information.
+
+    Examples:
+        >>> from narwhals import show_versions
+        >>> show_versions()  # doctest: +SKIP
+    """
+    sys_info = _get_sys_info()
+    deps_info = _get_deps_info()
+
+    print("\nSystem:")  # noqa: T201
+    for k, stat in sys_info.items():
+        print(f"{k:>10}: {stat}")  # noqa: T201
+
+    print("\nPython dependencies:")  # noqa: T201
+    for k, stat in deps_info.items():
+        print(f"{k:>13}: {stat}")  # noqa: T201
+
+
+def get_level(
+    obj: DataFrame[Any] | LazyFrame[Any] | Series[IntoSeriesT],
+) -> Literal["full", "lazy", "interchange"]:
+    """Level of support Narwhals has for current object.
+
+    Arguments:
+        obj: Dataframe or Series.
+
+    Returns:
+        This can be one of
+
+            - 'full': full Narwhals API support
+            - 'lazy': only lazy operations are supported. This excludes anything
+              which involves iterating over rows in Python.
+            - 'interchange': only metadata operations are supported (`df.schema`)
+    """
+    return obj._level
+
+
+@deprecate_native_namespace(warn_version="1.27.2", required=True)
+def read_csv(
+    source: str,
+    *,
+    backend: ModuleType | Implementation | str | None = None,
+    native_namespace: ModuleType | None = None,
+    **kwargs: Any,
+) -> DataFrame[Any]:
+    """Read a CSV file into a DataFrame.
+
+    Arguments:
+        source: Path to a file.
+        backend: The eager backend for DataFrame creation.
+            `backend` can be specified in various ways
+
+            - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+                `POLARS`, `MODIN` or `CUDF`.
+            - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+            - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+        native_namespace: The native library to use for DataFrame creation.
+
+            *Deprecated* (v1.27.2)
+
+            Please use `backend` instead. Note that `native_namespace` is still available
+            (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+            see [perfect backwards compatibility policy](../backcompat.md/).
+        kwargs: Extra keyword arguments which are passed to the native CSV reader.
+            For example, you could use
+            `nw.read_csv('file.csv', backend='pandas', engine='pyarrow')`.
+
+    Returns:
+        DataFrame.
+
+    Examples:
+        >>> import narwhals as nw
+        >>> nw.read_csv("file.csv", backend="pandas")  # doctest:+SKIP
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |        a   b     |
+        |     0  1   4     |
+        |     1  2   5     |
+        └──────────────────┘
+    """
+    backend = cast("ModuleType | Implementation | str", backend)
+    eager_backend = Implementation.from_backend(backend)
+    native_namespace = eager_backend.to_native_namespace()
+    native_frame: NativeFrame
+    if eager_backend in {
+        Implementation.POLARS,
+        Implementation.PANDAS,
+        Implementation.MODIN,
+        Implementation.CUDF,
+    }:
+        native_frame = native_namespace.read_csv(source, **kwargs)
+    elif eager_backend is Implementation.PYARROW:
+        from pyarrow import csv  # ignore-banned-import
+
+        native_frame = csv.read_csv(source, **kwargs)
+    else:  # pragma: no cover
+        try:
+            # implementation is UNKNOWN, Narwhals extension using this feature should
+            # implement `read_csv` function in the top-level namespace.
+            native_frame = native_namespace.read_csv(source=source, **kwargs)
+        except AttributeError as e:
+            msg = "Unknown namespace is expected to implement `read_csv` function."
+            raise AttributeError(msg) from e
+    return from_native(native_frame, eager_only=True)
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def scan_csv(
+    source: str,
+    *,
+    backend: ModuleType | Implementation | str | None = None,
+    native_namespace: ModuleType | None = None,
+    **kwargs: Any,
+) -> LazyFrame[Any]:
+    """Lazily read from a CSV file.
+
+    For the libraries that do not support lazy dataframes, the function reads
+    a csv file eagerly and then converts the resulting dataframe to a lazyframe.
+
+    Arguments:
+        source: Path to a file.
+        backend: The eager backend for DataFrame creation.
+            `backend` can be specified in various ways
+
+            - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+                `POLARS`, `MODIN` or `CUDF`.
+            - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+            - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+        native_namespace: The native library to use for DataFrame creation.
+
+            *Deprecated* (v1.31.0)
+
+            Please use `backend` instead. Note that `native_namespace` is still available
+            (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+            see [perfect backwards compatibility policy](../backcompat.md/).
+        kwargs: Extra keyword arguments which are passed to the native CSV reader.
+            For example, you could use
+            `nw.scan_csv('file.csv', backend=pd, engine='pyarrow')`.
+
+    Returns:
+        LazyFrame.
+
+    Examples:
+        >>> import duckdb
+        >>> import narwhals as nw
+        >>>
+        >>> nw.scan_csv("file.csv", backend="duckdb").to_native()  # doctest:+SKIP
+        ┌─────────┬───────┐
+        │    a    │   b   │
+        │ varchar │ int32 │
+        ├─────────┼───────┤
+        │ x       │     1 │
+        │ y       │     2 │
+        │ z       │     3 │
+        └─────────┴───────┘
+    """
+    backend = cast("ModuleType | Implementation | str", backend)
+    implementation = Implementation.from_backend(backend)
+    native_namespace = implementation.to_native_namespace()
+    native_frame: NativeFrame | NativeLazyFrame
+    if implementation is Implementation.POLARS:
+        native_frame = native_namespace.scan_csv(source, **kwargs)
+    elif implementation in {
+        Implementation.PANDAS,
+        Implementation.MODIN,
+        Implementation.CUDF,
+        Implementation.DASK,
+        Implementation.DUCKDB,
+        Implementation.IBIS,
+    }:
+        native_frame = native_namespace.read_csv(source, **kwargs)
+    elif implementation is Implementation.PYARROW:
+        from pyarrow import csv  # ignore-banned-import
+
+        native_frame = csv.read_csv(source, **kwargs)
+    elif implementation.is_spark_like():
+        if (session := kwargs.pop("session", None)) is None:
+            msg = "Spark like backends require a session object to be passed in `kwargs`."
+            raise ValueError(msg)
+
+        csv_reader = session.read.format("csv")
+        native_frame = (
+            csv_reader.load(source)
+            if (
+                implementation is Implementation.SQLFRAME
+                and parse_version(version("sqlframe")) < (3, 27, 0)
+            )
+            else csv_reader.options(**kwargs).load(source)
+        )
+    else:  # pragma: no cover
+        try:
+            # implementation is UNKNOWN, Narwhals extension using this feature should
+            # implement `scan_csv` function in the top-level namespace.
+            native_frame = native_namespace.scan_csv(source=source, **kwargs)
+        except AttributeError as e:
+            msg = "Unknown namespace is expected to implement `scan_csv` function."
+            raise AttributeError(msg) from e
+    return from_native(native_frame).lazy()
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def read_parquet(
+    source: str,
+    *,
+    backend: ModuleType | Implementation | str | None = None,
+    native_namespace: ModuleType | None = None,
+    **kwargs: Any,
+) -> DataFrame[Any]:
+    """Read into a DataFrame from a parquet file.
+
+    Arguments:
+        source: Path to a file.
+        backend: The eager backend for DataFrame creation.
+            `backend` can be specified in various ways
+
+            - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+                `POLARS`, `MODIN` or `CUDF`.
+            - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+            - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+        native_namespace: The native library to use for DataFrame creation.
+
+            *Deprecated* (v1.31.0)
+
+            Please use `backend` instead. Note that `native_namespace` is still available
+            (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+            see [perfect backwards compatibility policy](../backcompat.md/).
+        kwargs: Extra keyword arguments which are passed to the native parquet reader.
+            For example, you could use
+            `nw.read_parquet('file.parquet', backend=pd, engine='pyarrow')`.
+
+    Returns:
+        DataFrame.
+
+    Examples:
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
+        >>>
+        >>> nw.read_parquet("file.parquet", backend="pyarrow")  # doctest:+SKIP
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |pyarrow.Table     |
+        |a: int64          |
+        |c: double         |
+        |----              |
+        |a: [[1,2]]        |
+        |c: [[0.2,0.1]]    |
+        └──────────────────┘
+    """
+    backend = cast("ModuleType | Implementation | str", backend)
+    implementation = Implementation.from_backend(backend)
+    native_namespace = implementation.to_native_namespace()
+    native_frame: NativeFrame
+    if implementation in {
+        Implementation.POLARS,
+        Implementation.PANDAS,
+        Implementation.MODIN,
+        Implementation.CUDF,
+        Implementation.DUCKDB,
+        Implementation.IBIS,
+    }:
+        native_frame = native_namespace.read_parquet(source, **kwargs)
+    elif implementation is Implementation.PYARROW:
+        import pyarrow.parquet as pq  # ignore-banned-import
+
+        native_frame = pq.read_table(source, **kwargs)
+    else:  # pragma: no cover
+        try:
+            # implementation is UNKNOWN, Narwhals extension using this feature should
+            # implement `read_parquet` function in the top-level namespace.
+            native_frame = native_namespace.read_parquet(source=source, **kwargs)
+        except AttributeError as e:
+            msg = "Unknown namespace is expected to implement `read_parquet` function."
+            raise AttributeError(msg) from e
+    return from_native(native_frame, eager_only=True)
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def scan_parquet(
+    source: str,
+    *,
+    backend: ModuleType | Implementation | str | None = None,
+    native_namespace: ModuleType | None = None,
+    **kwargs: Any,
+) -> LazyFrame[Any]:
+    """Lazily read from a parquet file.
+
+    For the libraries that do not support lazy dataframes, the function reads
+    a parquet file eagerly and then converts the resulting dataframe to a lazyframe.
+
+    Note:
+        Spark like backends require a session object to be passed in `kwargs`.
+
+        For instance:
+
+        ```py
+        import narwhals as nw
+        from sqlframe.duckdb import DuckDBSession
+
+        nw.scan_parquet(source, backend="sqlframe", session=DuckDBSession())
+        ```
+
+    Arguments:
+        source: Path to a file.
+        backend: The eager backend for DataFrame creation.
+            `backend` can be specified in various ways
+
+            - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+                `POLARS`, `MODIN`, `CUDF`, `PYSPARK` or `SQLFRAME`.
+            - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"`, `"cudf"`,
+                `"pyspark"` or `"sqlframe"`.
+            - Directly as a module `pandas`, `pyarrow`, `polars`, `modin`, `cudf`,
+                `pyspark.sql` or `sqlframe`.
+        native_namespace: The native library to use for DataFrame creation.
+
+            *Deprecated* (v1.31.0)
+
+            Please use `backend` instead. Note that `native_namespace` is still available
+            (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+            see [perfect backwards compatibility policy](../backcompat.md/).
+        kwargs: Extra keyword arguments which are passed to the native parquet reader.
+            For example, you could use
+            `nw.scan_parquet('file.parquet', backend=pd, engine='pyarrow')`.
+
+    Returns:
+        LazyFrame.
+
+    Examples:
+        >>> import dask.dataframe as dd
+        >>> from sqlframe.duckdb import DuckDBSession
+        >>> import narwhals as nw
+        >>>
+        >>> nw.scan_parquet("file.parquet", backend="dask").collect()  # doctest:+SKIP
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |        a   b     |
+        |     0  1   4     |
+        |     1  2   5     |
+        └──────────────────┘
+        >>> nw.scan_parquet(
+        ...     "file.parquet", backend="sqlframe", session=DuckDBSession()
+        ... ).collect()  # doctest:+SKIP
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |  pyarrow.Table   |
+        |  a: int64        |
+        |  b: int64        |
+        |  ----            |
+        |  a: [[1,2]]      |
+        |  b: [[4,5]]      |
+        └──────────────────┘
+    """
+    backend = cast("ModuleType | Implementation | str", backend)
+    implementation = Implementation.from_backend(backend)
+    native_namespace = implementation.to_native_namespace()
+    native_frame: NativeFrame | NativeLazyFrame
+    if implementation is Implementation.POLARS:
+        native_frame = native_namespace.scan_parquet(source, **kwargs)
+    elif implementation in {
+        Implementation.PANDAS,
+        Implementation.MODIN,
+        Implementation.CUDF,
+        Implementation.DASK,
+        Implementation.DUCKDB,
+        Implementation.IBIS,
+    }:
+        native_frame = native_namespace.read_parquet(source, **kwargs)
+    elif implementation is Implementation.PYARROW:
+        import pyarrow.parquet as pq  # ignore-banned-import
+
+        native_frame = pq.read_table(source, **kwargs)
+    elif implementation.is_spark_like():
+        if (session := kwargs.pop("session", None)) is None:
+            msg = "Spark like backends require a session object to be passed in `kwargs`."
+            raise ValueError(msg)
+
+        pq_reader = session.read.format("parquet")
+        native_frame = (
+            pq_reader.load(source)
+            if (
+                implementation is Implementation.SQLFRAME
+                and parse_version(version("sqlframe")) < (3, 27, 0)
+            )
+            else pq_reader.options(**kwargs).load(source)
+        )
+
+    else:  # pragma: no cover
+        try:
+            # implementation is UNKNOWN, Narwhals extension using this feature should
+            # implement `scan_parquet` function in the top-level namespace.
+            native_frame = native_namespace.scan_parquet(source=source, **kwargs)
+        except AttributeError as e:
+            msg = "Unknown namespace is expected to implement `scan_parquet` function."
+            raise AttributeError(msg) from e
+    return from_native(native_frame).lazy()
+
+
+def col(*names: str | Iterable[str]) -> Expr:
+    """Creates an expression that references one or more columns by their name(s).
+
+    Arguments:
+        names: Name(s) of the columns to use.
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import polars as pl
+        >>> import narwhals as nw
+        >>>
+        >>> df_native = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": ["x", "z"]})
+        >>> nw.from_native(df_native).select(nw.col("a", "b") * nw.col("b"))
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |  shape: (2, 2)   |
+        |  ┌─────┬─────┐   |
+        |  │ a   ┆ b   │   |
+        |  │ --- ┆ --- │   |
+        |  │ i64 ┆ i64 │   |
+        |  ╞═════╪═════╡   |
+        |  │ 3   ┆ 9   │   |
+        |  │ 8   ┆ 16  │   |
+        |  └─────┴─────┘   |
+        └──────────────────┘
+    """
+    flat_names = flatten(names)
+
+    def func(plx: Any) -> Any:
+        return plx.col(*flat_names)
+
+    return Expr(
+        func,
+        ExprMetadata.selector_single()
+        if len(flat_names) == 1
+        else ExprMetadata.selector_multi_named(),
+    )
+
+
+def exclude(*names: str | Iterable[str]) -> Expr:
+    """Creates an expression that excludes columns by their name(s).
+
+    Arguments:
+        names: Name(s) of the columns to exclude.
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import polars as pl
+        >>> import narwhals as nw
+        >>>
+        >>> df_native = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": ["x", "z"]})
+        >>> nw.from_native(df_native).select(nw.exclude("c", "a"))
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |  shape: (2, 1)   |
+        |  ┌─────┐         |
+        |  │ b   │         |
+        |  │ --- │         |
+        |  │ i64 │         |
+        |  ╞═════╡         |
+        |  │ 3   │         |
+        |  │ 4   │         |
+        |  └─────┘         |
+        └──────────────────┘
+    """
+    exclude_names = frozenset(flatten(names))
+
+    def func(plx: Any) -> Any:
+        return plx.exclude(exclude_names)
+
+    return Expr(func, ExprMetadata.selector_multi_unnamed())
+
+
+def nth(*indices: int | Sequence[int]) -> Expr:
+    """Creates an expression that references one or more columns by their index(es).
+
+    Notes:
+        `nth` is not supported for Polars version<1.0.0. Please use
+        [`narwhals.col`][] instead.
+
+    Arguments:
+        indices: One or more indices representing the columns to retrieve.
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
+        >>>
+        >>> df_native = pa.table({"a": [1, 2], "b": [3, 4], "c": [0.123, 3.14]})
+        >>> nw.from_native(df_native).select(nw.nth(0, 2) * 2)
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |pyarrow.Table     |
+        |a: int64          |
+        |c: double         |
+        |----              |
+        |a: [[2,4]]        |
+        |c: [[0.246,6.28]] |
+        └──────────────────┘
+    """
+    flat_indices = flatten(indices)
+
+    def func(plx: Any) -> Any:
+        return plx.nth(*flat_indices)
+
+    return Expr(
+        func,
+        ExprMetadata.selector_single()
+        if len(flat_indices) == 1
+        else ExprMetadata.selector_multi_unnamed(),
+    )
+
+
+# Add underscore so it doesn't conflict with builtin `all`
+def all_() -> Expr:
+    """Instantiate an expression representing all columns.
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import pandas as pd
+        >>> import narwhals as nw
+        >>>
+        >>> df_native = pd.DataFrame({"a": [1, 2], "b": [3.14, 0.123]})
+        >>> nw.from_native(df_native).select(nw.all() * 2)
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |      a      b    |
+        |   0  2  6.280    |
+        |   1  4  0.246    |
+        └──────────────────┘
+    """
+    return Expr(lambda plx: plx.all(), ExprMetadata.selector_multi_unnamed())
+
+
+# Add underscore so it doesn't conflict with builtin `len`
+def len_() -> Expr:
+    """Return the number of rows.
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import polars as pl
+        >>> import narwhals as nw
+        >>>
+        >>> df_native = pl.DataFrame({"a": [1, 2], "b": [5, None]})
+        >>> nw.from_native(df_native).select(nw.len())
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |  shape: (1, 1)   |
+        |  ┌─────┐         |
+        |  │ len │         |
+        |  │ --- │         |
+        |  │ u32 │         |
+        |  ╞═════╡         |
+        |  │ 2   │         |
+        |  └─────┘         |
+        └──────────────────┘
+    """
+
+    def func(plx: Any) -> Any:
+        return plx.len()
+
+    return Expr(func, ExprMetadata.aggregation())
+
+
+def sum(*columns: str) -> Expr:
+    """Sum all values.
+
+    Note:
+        Syntactic sugar for ``nw.col(columns).sum()``
+
+    Arguments:
+        columns: Name(s) of the columns to use in the aggregation function
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import pandas as pd
+        >>> import narwhals as nw
+        >>>
+        >>> df_native = pd.DataFrame({"a": [1, 2], "b": [-1.4, 6.2]})
+        >>> nw.from_native(df_native).select(nw.sum("a", "b"))
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |       a    b     |
+        |    0  3  4.8     |
+        └──────────────────┘
+    """
+    return col(*columns).sum()
+
+
+def mean(*columns: str) -> Expr:
+    """Get the mean value.
+
+    Note:
+        Syntactic sugar for ``nw.col(columns).mean()``
+
+    Arguments:
+        columns: Name(s) of the columns to use in the aggregation function
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
+        >>>
+        >>> df_native = pa.table({"a": [1, 8, 3], "b": [3.14, 6.28, 42.1]})
+        >>> nw.from_native(df_native).select(nw.mean("a", "b"))
+        ┌─────────────────────────┐
+        |   Narwhals DataFrame    |
+        |-------------------------|
+        |pyarrow.Table            |
+        |a: double                |
+        |b: double                |
+        |----                     |
+        |a: [[4]]                 |
+        |b: [[17.173333333333336]]|
+        └─────────────────────────┘
+    """
+    return col(*columns).mean()
+
+
+def median(*columns: str) -> Expr:
+    """Get the median value.
+
+    Notes:
+        - Syntactic sugar for ``nw.col(columns).median()``
+        - Results might slightly differ across backends due to differences in the
+            underlying algorithms used to compute the median.
+
+    Arguments:
+        columns: Name(s) of the columns to use in the aggregation function
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import polars as pl
+        >>> import narwhals as nw
+        >>>
+        >>> df_native = pl.DataFrame({"a": [4, 5, 2]})
+        >>> nw.from_native(df_native).select(nw.median("a"))
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |  shape: (1, 1)   |
+        |  ┌─────┐         |
+        |  │ a   │         |
+        |  │ --- │         |
+        |  │ f64 │         |
+        |  ╞═════╡         |
+        |  │ 4.0 │         |
+        |  └─────┘         |
+        └──────────────────┘
+    """
+    return col(*columns).median()
+
+
+def min(*columns: str) -> Expr:
+    """Return the minimum value.
+
+    Note:
+       Syntactic sugar for ``nw.col(columns).min()``.
+
+    Arguments:
+        columns: Name(s) of the columns to use in the aggregation function.
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
+        >>>
+        >>> df_native = pa.table({"a": [1, 2], "b": [5, 10]})
+        >>> nw.from_native(df_native).select(nw.min("a", "b"))
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |  pyarrow.Table   |
+        |  a: int64        |
+        |  b: int64        |
+        |  ----            |
+        |  a: [[1]]        |
+        |  b: [[5]]        |
+        └──────────────────┘
+    """
+    return col(*columns).min()
+
+
+def max(*columns: str) -> Expr:
+    """Return the maximum value.
+
+    Note:
+       Syntactic sugar for ``nw.col(columns).max()``.
+
+    Arguments:
+        columns: Name(s) of the columns to use in the aggregation function.
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import pandas as pd
+        >>> import narwhals as nw
+        >>>
+        >>> df_native = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
+        >>> nw.from_native(df_native).select(nw.max("a", "b"))
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |        a   b     |
+        |     0  2  10     |
+        └──────────────────┘
+    """
+    return col(*columns).max()
+
+
+def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+    """Sum all values horizontally across columns.
+
+    Warning:
+        Unlike Polars, we support horizontal sum over numeric columns only.
+
+    Arguments:
+        exprs: Name(s) of the columns to use in the aggregation function. Accepts
+            expression input.
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import polars as pl
+        >>> import narwhals as nw
+        >>>
+        >>> df_native = pl.DataFrame({"a": [1, 2, 3], "b": [5, 10, None]})
+        >>> nw.from_native(df_native).with_columns(sum=nw.sum_horizontal("a", "b"))
+        ┌────────────────────┐
+        | Narwhals DataFrame |
+        |--------------------|
+        |shape: (3, 3)       |
+        |┌─────┬──────┬─────┐|
+        |│ a   ┆ b    ┆ sum │|
+        |│ --- ┆ ---  ┆ --- │|
+        |│ i64 ┆ i64  ┆ i64 │|
+        |╞═════╪══════╪═════╡|
+        |│ 1   ┆ 5    ┆ 6   │|
+        |│ 2   ┆ 10   ┆ 12  │|
+        |│ 3   ┆ null ┆ 3   │|
+        |└─────┴──────┴─────┘|
+        └────────────────────┘
+    """
+    if not exprs:
+        msg = "At least one expression must be passed to `sum_horizontal`"
+        raise ValueError(msg)
+    flat_exprs = flatten(exprs)
+    return Expr(
+        lambda plx: apply_n_ary_operation(
+            plx, plx.sum_horizontal, *flat_exprs, str_as_lit=False
+        ),
+        ExprMetadata.from_horizontal_op(*flat_exprs),
+    )
+
+
+def min_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+    """Get the minimum value horizontally across columns.
+
+    Notes:
+        We support `min_horizontal` over numeric columns only.
+
+    Arguments:
+        exprs: Name(s) of the columns to use in the aggregation function. Accepts
+            expression input.
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
+        >>>
+        >>> df_native = pa.table({"a": [1, 8, 3], "b": [4, 5, None]})
+        >>> nw.from_native(df_native).with_columns(h_min=nw.min_horizontal("a", "b"))
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        | pyarrow.Table    |
+        | a: int64         |
+        | b: int64         |
+        | h_min: int64     |
+        | ----             |
+        | a: [[1,8,3]]     |
+        | b: [[4,5,null]]  |
+        | h_min: [[1,5,3]] |
+        └──────────────────┘
+    """
+    if not exprs:
+        msg = "At least one expression must be passed to `min_horizontal`"
+        raise ValueError(msg)
+    flat_exprs = flatten(exprs)
+    return Expr(
+        lambda plx: apply_n_ary_operation(
+            plx, plx.min_horizontal, *flat_exprs, str_as_lit=False
+        ),
+        ExprMetadata.from_horizontal_op(*flat_exprs),
+    )
+
+
+def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+    """Get the maximum value horizontally across columns.
+
+    Notes:
+        We support `max_horizontal` over numeric columns only.
+
+    Arguments:
+        exprs: Name(s) of the columns to use in the aggregation function. Accepts
+            expression input.
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import polars as pl
+        >>> import narwhals as nw
+        >>>
+        >>> df_native = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, None]})
+        >>> nw.from_native(df_native).with_columns(h_max=nw.max_horizontal("a", "b"))
+        ┌──────────────────────┐
+        |  Narwhals DataFrame  |
+        |----------------------|
+        |shape: (3, 3)         |
+        |┌─────┬──────┬───────┐|
+        |│ a   ┆ b    ┆ h_max │|
+        |│ --- ┆ ---  ┆ ---   │|
+        |│ i64 ┆ i64  ┆ i64   │|
+        |╞═════╪══════╪═══════╡|
+        |│ 1   ┆ 4    ┆ 4     │|
+        |│ 8   ┆ 5    ┆ 8     │|
+        |│ 3   ┆ null ┆ 3     │|
+        |└─────┴──────┴───────┘|
+        └──────────────────────┘
+    """
+    if not exprs:
+        msg = "At least one expression must be passed to `max_horizontal`"
+        raise ValueError(msg)
+    flat_exprs = flatten(exprs)
+    return Expr(
+        lambda plx: apply_n_ary_operation(
+            plx, plx.max_horizontal, *flat_exprs, str_as_lit=False
+        ),
+        ExprMetadata.from_horizontal_op(*flat_exprs),
+    )
+
+
+class When:
+    def __init__(self, *predicates: IntoExpr | Iterable[IntoExpr]) -> None:
+        self._predicate = all_horizontal(*flatten(predicates))
+
+    def then(self, value: IntoExpr | NonNestedLiteral | _1DArray) -> Then:
+        kind = ExprKind.from_into_expr(value, str_as_lit=False)
+        if self._predicate._metadata.is_scalar_like and not kind.is_scalar_like:
+            msg = (
+                "If you pass a scalar-like predicate to `nw.when`, then "
+                "the `then` value must also be scalar-like."
+            )
+            raise ShapeError(msg)
+
+        return Then(
+            lambda plx: apply_n_ary_operation(
+                plx,
+                lambda *args: plx.when(args[0]).then(args[1]),
+                self._predicate,
+                value,
+                str_as_lit=False,
+            ),
+            combine_metadata(
+                self._predicate,
+                value,
+                str_as_lit=False,
+                allow_multi_output=False,
+                to_single_output=False,
+            ),
+        )
+
+
+class Then(Expr):
+    def otherwise(self, value: IntoExpr | NonNestedLiteral | _1DArray) -> Expr:
+        kind = ExprKind.from_into_expr(value, str_as_lit=False)
+        if self._metadata.is_scalar_like and not is_scalar_like(kind):
+            msg = (
+                "If you pass a scalar-like predicate to `nw.when`, then "
+                "the `otherwise` value must also be scalar-like."
+            )
+            raise ShapeError(msg)
+
+        def func(plx: CompliantNamespace[Any, Any]) -> CompliantExpr[Any, Any]:
+            compliant_expr = self._to_compliant_expr(plx)
+            compliant_value = extract_compliant(plx, value, str_as_lit=False)
+            if (
+                not self._metadata.is_scalar_like
+                and is_scalar_like(kind)
+                and is_compliant_expr(compliant_value)
+            ):
+                compliant_value = compliant_value.broadcast(kind)
+            return compliant_expr.otherwise(compliant_value)  # type: ignore[attr-defined, no-any-return]
+
+        return Expr(
+            func,
+            combine_metadata(
+                self,
+                value,
+                str_as_lit=False,
+                allow_multi_output=False,
+                to_single_output=False,
+            ),
+        )
+
+
+def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
+    """Start a `when-then-otherwise` expression.
+
+    Expression similar to an `if-else` statement in Python. Always initiated by a
+    `pl.when(<condition>).then(<value if condition>)`, and optionally followed by a
+    `.otherwise(<value if condition is false>)` can be appended at the end. If not
+    appended, and the condition is not `True`, `None` will be returned.
+
+    Info:
+        Chaining multiple `.when(<condition>).then(<value>)` statements is currently
+        not supported.
+        See [Narwhals#668](https://github.com/narwhals-dev/narwhals/issues/668).
+
+    Arguments:
+        predicates: Condition(s) that must be met in order to apply the subsequent
+            statement. Accepts one or more boolean expressions, which are implicitly
+            combined with `&`. String input is parsed as a column name.
+
+    Returns:
+        A "when" object, which `.then` can be called on.
+
+    Examples:
+        >>> import pandas as pd
+        >>> import narwhals as nw
+        >>>
+        >>> data = {"a": [1, 2, 3], "b": [5, 10, 15]}
+        >>> df_native = pd.DataFrame(data)
+        >>> nw.from_native(df_native).with_columns(
+        ...     nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when")
+        ... )
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |    a   b  a_when |
+        | 0  1   5       5 |
+        | 1  2  10       5 |
+        | 2  3  15       6 |
+        └──────────────────┘
+    """
+    return When(*predicates)
+
+
+def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+    r"""Compute the bitwise AND horizontally across columns.
+
+    Arguments:
+        exprs: Name(s) of the columns to use in the aggregation function. Accepts
+            expression input.
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
+        >>>
+        >>> data = {
+        ...     "a": [False, False, True, True, False, None],
+        ...     "b": [False, True, True, None, None, None],
+        ... }
+        >>> df_native = pa.table(data)
+        >>> nw.from_native(df_native).select("a", "b", all=nw.all_horizontal("a", "b"))
+        ┌─────────────────────────────────────────┐
+        |           Narwhals DataFrame            |
+        |-----------------------------------------|
+        |pyarrow.Table                            |
+        |a: bool                                  |
+        |b: bool                                  |
+        |all: bool                                |
+        |----                                     |
+        |a: [[false,false,true,true,false,null]]  |
+        |b: [[false,true,true,null,null,null]]    |
+        |all: [[false,false,true,null,false,null]]|
+        └─────────────────────────────────────────┘
+
+    """
+    if not exprs:
+        msg = "At least one expression must be passed to `all_horizontal`"
+        raise ValueError(msg)
+    flat_exprs = flatten(exprs)
+    return Expr(
+        lambda plx: apply_n_ary_operation(
+            plx, plx.all_horizontal, *flat_exprs, str_as_lit=False
+        ),
+        ExprMetadata.from_horizontal_op(*flat_exprs),
+    )
+
+
+def lit(value: NonNestedLiteral, dtype: IntoDType | None = None) -> Expr:
+    """Return an expression representing a literal value.
+
+    Arguments:
+        value: The value to use as literal.
+        dtype: The data type of the literal value. If not provided, the data type will
+            be inferred by the native library.
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import pandas as pd
+        >>> import narwhals as nw
+        >>>
+        >>> df_native = pd.DataFrame({"a": [1, 2]})
+        >>> nw.from_native(df_native).with_columns(nw.lit(3))
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |     a  literal   |
+        |  0  1        3   |
+        |  1  2        3   |
+        └──────────────────┘
+    """
+    if is_numpy_array(value):
+        msg = (
+            "numpy arrays are not supported as literal values. "
+            "Consider using `with_columns` to create a new column from the array."
+        )
+        raise ValueError(msg)
+
+    if isinstance(value, (list, tuple)):
+        msg = f"Nested datatypes are not supported yet. Got {value}"
+        raise NotImplementedError(msg)
+
+    return Expr(lambda plx: plx.lit(value, dtype), ExprMetadata.literal())
+
+
+def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+    r"""Compute the bitwise OR horizontally across columns.
+
+    Arguments:
+        exprs: Name(s) of the columns to use in the aggregation function. Accepts
+            expression input.
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import polars as pl
+        >>> import narwhals as nw
+        >>>
+        >>> data = {
+        ...     "a": [False, False, True, True, False, None],
+        ...     "b": [False, True, True, None, None, None],
+        ... }
+        >>> df_native = pl.DataFrame(data)
+        >>> nw.from_native(df_native).select("a", "b", any=nw.any_horizontal("a", "b"))
+        ┌─────────────────────────┐
+        |   Narwhals DataFrame    |
+        |-------------------------|
+        |shape: (6, 3)            |
+        |┌───────┬───────┬───────┐|
+        |│ a     ┆ b     ┆ any   │|
+        |│ ---   ┆ ---   ┆ ---   │|
+        |│ bool  ┆ bool  ┆ bool  │|
+        |╞═══════╪═══════╪═══════╡|
+        |│ false ┆ false ┆ false │|
+        |│ false ┆ true  ┆ true  │|
+        |│ true  ┆ true  ┆ true  │|
+        |│ true  ┆ null  ┆ true  │|
+        |│ false ┆ null  ┆ null  │|
+        |│ null  ┆ null  ┆ null  │|
+        |└───────┴───────┴───────┘|
+        └─────────────────────────┘
+    """
+    if not exprs:
+        msg = "At least one expression must be passed to `any_horizontal`"
+        raise ValueError(msg)
+    flat_exprs = flatten(exprs)
+    return Expr(
+        lambda plx: apply_n_ary_operation(
+            plx, plx.any_horizontal, *flat_exprs, str_as_lit=False
+        ),
+        ExprMetadata.from_horizontal_op(*flat_exprs),
+    )
+
+
+def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+    """Compute the mean of all values horizontally across columns.
+
+    Arguments:
+        exprs: Name(s) of the columns to use in the aggregation function. Accepts
+            expression input.
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
+        >>>
+        >>> data = {"a": [1, 8, 3], "b": [4, 5, None], "c": ["x", "y", "z"]}
+        >>> df_native = pa.table(data)
+
+        We define a dataframe-agnostic function that computes the horizontal mean of "a"
+        and "b" columns:
+
+        >>> nw.from_native(df_native).select(nw.mean_horizontal("a", "b"))
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        | pyarrow.Table    |
+        | a: double        |
+        | ----             |
+        | a: [[2.5,6.5,3]] |
+        └──────────────────┘
+    """
+    if not exprs:
+        msg = "At least one expression must be passed to `mean_horizontal`"
+        raise ValueError(msg)
+    flat_exprs = flatten(exprs)
+    return Expr(
+        lambda plx: apply_n_ary_operation(
+            plx, plx.mean_horizontal, *flat_exprs, str_as_lit=False
+        ),
+        ExprMetadata.from_horizontal_op(*flat_exprs),
+    )
+
+
+def concat_str(
+    exprs: IntoExpr | Iterable[IntoExpr],
+    *more_exprs: IntoExpr,
+    separator: str = "",
+    ignore_nulls: bool = False,
+) -> Expr:
+    r"""Horizontally concatenate columns into a single string column.
+
+    Arguments:
+        exprs: Columns to concatenate into a single string column. Accepts expression
+            input. Strings are parsed as column names, other non-expression inputs are
+            parsed as literals. Non-`String` columns are cast to `String`.
+        *more_exprs: Additional columns to concatenate into a single string column,
+            specified as positional arguments.
+        separator: String that will be used to separate the values of each column.
+        ignore_nulls: Ignore null values (default is `False`).
+            If set to `False`, null values will be propagated and if the row contains any
+            null values, the output is null.
+
+    Returns:
+        A new expression.
+
+    Examples:
+        >>> import pandas as pd
+        >>> import narwhals as nw
+        >>>
+        >>> data = {
+        ...     "a": [1, 2, 3],
+        ...     "b": ["dogs", "cats", None],
+        ...     "c": ["play", "swim", "walk"],
+        ... }
+        >>> df_native = pd.DataFrame(data)
+        >>> (
+        ...     nw.from_native(df_native).select(
+        ...         nw.concat_str(
+        ...             [nw.col("a") * 2, nw.col("b"), nw.col("c")], separator=" "
+        ...         ).alias("full_sentence")
+        ...     )
+        ... )
+        ┌──────────────────┐
+        |Narwhals DataFrame|
+        |------------------|
+        |   full_sentence  |
+        | 0   2 dogs play  |
+        | 1   4 cats swim  |
+        | 2          None  |
+        └──────────────────┘
+    """
+    flat_exprs = flatten([*flatten([exprs]), *more_exprs])
+    return Expr(
+        lambda plx: apply_n_ary_operation(
+            plx,
+            lambda *args: plx.concat_str(
+                *args, separator=separator, ignore_nulls=ignore_nulls
+            ),
+            *flat_exprs,
+            str_as_lit=False,
+        ),
+        combine_metadata(
+            *flat_exprs, str_as_lit=False, allow_multi_output=True, to_single_output=True
+        ),
+    )
author	sotech117 <michael_foiani@brown.edu>	2025-07-31 17:27:24 -0400
committer	sotech117 <michael_foiani@brown.edu>	2025-07-31 17:27:24 -0400
commit	5bf22fc7e3c392c8bd44315ca2d06d7dca7d084e (patch)
tree	8dacb0f195df1c0788d36dd0064f6bbaa3143ede /venv/lib/python3.8/site-packages/narwhals/functions.py
parent	b832d364da8c2efe09e3f75828caf73c50d01ce3 (diff)