From 5bf22fc7e3c392c8bd44315ca2d06d7dca7d084e Mon Sep 17 00:00:00 2001
From: sotech117 <michael_foiani@brown.edu>
Date: Thu, 31 Jul 2025 17:27:24 -0400
Subject: add code for analysis of data

---
 .../site-packages/narwhals/_expression_parsing.py  | 609 +++++++++++++++++++++
 1 file changed, 609 insertions(+)
 create mode 100644 venv/lib/python3.8/site-packages/narwhals/_expression_parsing.py

(limited to 'venv/lib/python3.8/site-packages/narwhals/_expression_parsing.py')

diff --git a/venv/lib/python3.8/site-packages/narwhals/_expression_parsing.py b/venv/lib/python3.8/site-packages/narwhals/_expression_parsing.py
new file mode 100644
index 0000000..c442c3b
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_expression_parsing.py
@@ -0,0 +1,609 @@
+# Utilities for expression parsing
+# Useful for backends which don't have any concept of expressions, such
+# and pandas or PyArrow.
+from __future__ import annotations
+
+from enum import Enum, auto
+from itertools import chain
+from typing import TYPE_CHECKING, Any, Literal, Sequence, TypeVar, cast
+
+from narwhals._utils import is_compliant_expr
+from narwhals.dependencies import is_narwhals_series, is_numpy_array
+from narwhals.exceptions import (
+    InvalidOperationError,
+    LengthChangingExprError,
+    MultiOutputExpressionError,
+    ShapeError,
+)
+
+if TYPE_CHECKING:
+    from typing_extensions import Never, TypeIs
+
+    from narwhals._compliant import CompliantExpr, CompliantFrameT
+    from narwhals._compliant.typing import (
+        AliasNames,
+        CompliantExprAny,
+        CompliantFrameAny,
+        CompliantNamespaceAny,
+        EagerNamespaceAny,
+        EvalNames,
+    )
+    from narwhals.expr import Expr
+    from narwhals.series import Series
+    from narwhals.typing import IntoExpr, NonNestedLiteral, _1DArray
+
+    T = TypeVar("T")
+
+
+def is_expr(obj: Any) -> TypeIs[Expr]:
+    """Check whether `obj` is a Narwhals Expr."""
+    from narwhals.expr import Expr
+
+    return isinstance(obj, Expr)
+
+
+def is_series(obj: Any) -> TypeIs[Series[Any]]:
+    """Check whether `obj` is a Narwhals Expr."""
+    from narwhals.series import Series
+
+    return isinstance(obj, Series)
+
+
+def combine_evaluate_output_names(
+    *exprs: CompliantExpr[CompliantFrameT, Any],
+) -> EvalNames[CompliantFrameT]:
+    # Follow left-hand-rule for naming. E.g. `nw.sum_horizontal(expr1, expr2)` takes the
+    # first name of `expr1`.
+    if not is_compliant_expr(exprs[0]):  # pragma: no cover
+        msg = f"Safety assertion failed, expected expression, got: {type(exprs[0])}. Please report a bug."
+        raise AssertionError(msg)
+
+    def evaluate_output_names(df: CompliantFrameT) -> Sequence[str]:
+        return exprs[0]._evaluate_output_names(df)[:1]
+
+    return evaluate_output_names
+
+
+def combine_alias_output_names(*exprs: CompliantExprAny) -> AliasNames | None:
+    # Follow left-hand-rule for naming. E.g. `nw.sum_horizontal(expr1.alias(alias), expr2)` takes the
+    # aliasing function of `expr1` and apply it to the first output name of `expr1`.
+    if exprs[0]._alias_output_names is None:
+        return None
+
+    def alias_output_names(names: Sequence[str]) -> Sequence[str]:
+        return exprs[0]._alias_output_names(names)[:1]  # type: ignore[misc]
+
+    return alias_output_names
+
+
+def extract_compliant(
+    plx: CompliantNamespaceAny,
+    other: IntoExpr | NonNestedLiteral | _1DArray,
+    *,
+    str_as_lit: bool,
+) -> CompliantExprAny | NonNestedLiteral:
+    if is_expr(other):
+        return other._to_compliant_expr(plx)
+    if isinstance(other, str) and not str_as_lit:
+        return plx.col(other)
+    if is_narwhals_series(other):
+        return other._compliant_series._to_expr()
+    if is_numpy_array(other):
+        ns = cast("EagerNamespaceAny", plx)
+        return ns._series.from_numpy(other, context=ns)._to_expr()
+    return other
+
+
+def evaluate_output_names_and_aliases(
+    expr: CompliantExprAny, df: CompliantFrameAny, exclude: Sequence[str]
+) -> tuple[Sequence[str], Sequence[str]]:
+    output_names = expr._evaluate_output_names(df)
+    aliases = (
+        output_names
+        if expr._alias_output_names is None
+        else expr._alias_output_names(output_names)
+    )
+    if exclude:
+        assert expr._metadata is not None  # noqa: S101
+        if expr._metadata.expansion_kind.is_multi_unnamed():
+            output_names, aliases = zip(
+                *[
+                    (x, alias)
+                    for x, alias in zip(output_names, aliases)
+                    if x not in exclude
+                ]
+            )
+    return output_names, aliases
+
+
+class ExprKind(Enum):
+    """Describe which kind of expression we are dealing with."""
+
+    LITERAL = auto()
+    """e.g. `nw.lit(1)`"""
+
+    AGGREGATION = auto()
+    """Reduces to a single value, not affected by row order, e.g. `nw.col('a').mean()`"""
+
+    ORDERABLE_AGGREGATION = auto()
+    """Reduces to a single value, affected by row order, e.g. `nw.col('a').arg_max()`"""
+
+    ELEMENTWISE = auto()
+    """Preserves length, can operate without context for surrounding rows, e.g. `nw.col('a').abs()`."""
+
+    ORDERABLE_WINDOW = auto()
+    """Depends on the rows around it and on their order, e.g. `diff`."""
+
+    UNORDERABLE_WINDOW = auto()
+    """Depends on the rows around it but not on their order, e.g. `rank`."""
+
+    FILTRATION = auto()
+    """Changes length, not affected by row order, e.g. `drop_nulls`."""
+
+    ORDERABLE_FILTRATION = auto()
+    """Changes length, affected by row order, e.g. `tail`."""
+
+    NARY = auto()
+    """Results from the combination of multiple expressions."""
+
+    OVER = auto()
+    """Results from calling `.over` on expression."""
+
+    UNKNOWN = auto()
+    """Based on the information we have, we can't determine the ExprKind."""
+
+    @property
+    def is_scalar_like(self) -> bool:
+        return self in {ExprKind.LITERAL, ExprKind.AGGREGATION}
+
+    @property
+    def is_orderable_window(self) -> bool:
+        return self in {ExprKind.ORDERABLE_WINDOW, ExprKind.ORDERABLE_AGGREGATION}
+
+    @classmethod
+    def from_expr(cls, obj: Expr) -> ExprKind:
+        meta = obj._metadata
+        if meta.is_literal:
+            return ExprKind.LITERAL
+        if meta.is_scalar_like:
+            return ExprKind.AGGREGATION
+        if meta.is_elementwise:
+            return ExprKind.ELEMENTWISE
+        return ExprKind.UNKNOWN
+
+    @classmethod
+    def from_into_expr(
+        cls, obj: IntoExpr | NonNestedLiteral | _1DArray, *, str_as_lit: bool
+    ) -> ExprKind:
+        if is_expr(obj):
+            return cls.from_expr(obj)
+        if (
+            is_narwhals_series(obj)
+            or is_numpy_array(obj)
+            or (isinstance(obj, str) and not str_as_lit)
+        ):
+            return ExprKind.ELEMENTWISE
+        return ExprKind.LITERAL
+
+
+def is_scalar_like(
+    obj: ExprKind,
+) -> TypeIs[Literal[ExprKind.LITERAL, ExprKind.AGGREGATION]]:
+    return obj.is_scalar_like
+
+
+class ExpansionKind(Enum):
+    """Describe what kind of expansion the expression performs."""
+
+    SINGLE = auto()
+    """e.g. `nw.col('a'), nw.sum_horizontal(nw.all())`"""
+
+    MULTI_NAMED = auto()
+    """e.g. `nw.col('a', 'b')`"""
+
+    MULTI_UNNAMED = auto()
+    """e.g. `nw.all()`, nw.nth(0, 1)"""
+
+    def is_multi_unnamed(self) -> bool:
+        return self is ExpansionKind.MULTI_UNNAMED
+
+    def is_multi_output(self) -> bool:
+        return self in {ExpansionKind.MULTI_NAMED, ExpansionKind.MULTI_UNNAMED}
+
+    def __and__(self, other: ExpansionKind) -> Literal[ExpansionKind.MULTI_UNNAMED]:
+        if self is ExpansionKind.MULTI_UNNAMED and other is ExpansionKind.MULTI_UNNAMED:
+            # e.g. nw.selectors.all() - nw.selectors.numeric().
+            return ExpansionKind.MULTI_UNNAMED
+        # Don't attempt anything more complex, keep it simple and raise in the face of ambiguity.
+        msg = f"Unsupported ExpansionKind combination, got {self} and {other}, please report a bug."  # pragma: no cover
+        raise AssertionError(msg)  # pragma: no cover
+
+
+class ExprMetadata:
+    __slots__ = (
+        "expansion_kind",
+        "has_windows",
+        "is_elementwise",
+        "is_literal",
+        "is_scalar_like",
+        "last_node",
+        "n_orderable_ops",
+        "preserves_length",
+    )
+
+    def __init__(
+        self,
+        expansion_kind: ExpansionKind,
+        last_node: ExprKind,
+        *,
+        has_windows: bool = False,
+        n_orderable_ops: int = 0,
+        preserves_length: bool = True,
+        is_elementwise: bool = True,
+        is_scalar_like: bool = False,
+        is_literal: bool = False,
+    ) -> None:
+        if is_literal:
+            assert is_scalar_like  # noqa: S101  # debug assertion
+        if is_elementwise:
+            assert preserves_length  # noqa: S101  # debug assertion
+        self.expansion_kind: ExpansionKind = expansion_kind
+        self.last_node: ExprKind = last_node
+        self.has_windows: bool = has_windows
+        self.n_orderable_ops: int = n_orderable_ops
+        self.is_elementwise: bool = is_elementwise
+        self.preserves_length: bool = preserves_length
+        self.is_scalar_like: bool = is_scalar_like
+        self.is_literal: bool = is_literal
+
+    def __init_subclass__(cls, /, *args: Any, **kwds: Any) -> Never:  # pragma: no cover
+        msg = f"Cannot subclass {cls.__name__!r}"
+        raise TypeError(msg)
+
+    def __repr__(self) -> str:  # pragma: no cover
+        return (
+            f"ExprMetadata(\n"
+            f"  expansion_kind: {self.expansion_kind},\n"
+            f"  last_node: {self.last_node},\n"
+            f"  has_windows: {self.has_windows},\n"
+            f"  n_orderable_ops: {self.n_orderable_ops},\n"
+            f"  is_elementwise: {self.is_elementwise},\n"
+            f"  preserves_length: {self.preserves_length},\n"
+            f"  is_scalar_like: {self.is_scalar_like},\n"
+            f"  is_literal: {self.is_literal},\n"
+            ")"
+        )
+
+    @property
+    def is_filtration(self) -> bool:
+        return not self.preserves_length and not self.is_scalar_like
+
+    def with_aggregation(self) -> ExprMetadata:
+        if self.is_scalar_like:
+            msg = "Can't apply aggregations to scalar-like expressions."
+            raise InvalidOperationError(msg)
+        return ExprMetadata(
+            self.expansion_kind,
+            ExprKind.AGGREGATION,
+            has_windows=self.has_windows,
+            n_orderable_ops=self.n_orderable_ops,
+            preserves_length=False,
+            is_elementwise=False,
+            is_scalar_like=True,
+            is_literal=False,
+        )
+
+    def with_orderable_aggregation(self) -> ExprMetadata:
+        if self.is_scalar_like:
+            msg = "Can't apply aggregations to scalar-like expressions."
+            raise InvalidOperationError(msg)
+        return ExprMetadata(
+            self.expansion_kind,
+            ExprKind.ORDERABLE_AGGREGATION,
+            has_windows=self.has_windows,
+            n_orderable_ops=self.n_orderable_ops + 1,
+            preserves_length=False,
+            is_elementwise=False,
+            is_scalar_like=True,
+            is_literal=False,
+        )
+
+    def with_elementwise_op(self) -> ExprMetadata:
+        return ExprMetadata(
+            self.expansion_kind,
+            ExprKind.ELEMENTWISE,
+            has_windows=self.has_windows,
+            n_orderable_ops=self.n_orderable_ops,
+            preserves_length=self.preserves_length,
+            is_elementwise=self.is_elementwise,
+            is_scalar_like=self.is_scalar_like,
+            is_literal=self.is_literal,
+        )
+
+    def with_unorderable_window(self) -> ExprMetadata:
+        if self.is_scalar_like:
+            msg = "Can't apply unorderable window (`rank`, `is_unique`) to scalar-like expression."
+            raise InvalidOperationError(msg)
+        return ExprMetadata(
+            self.expansion_kind,
+            ExprKind.UNORDERABLE_WINDOW,
+            has_windows=self.has_windows,
+            n_orderable_ops=self.n_orderable_ops,
+            preserves_length=self.preserves_length,
+            is_elementwise=False,
+            is_scalar_like=False,
+            is_literal=False,
+        )
+
+    def with_orderable_window(self) -> ExprMetadata:
+        if self.is_scalar_like:
+            msg = "Can't apply orderable window (e.g. `diff`, `shift`) to scalar-like expression."
+            raise InvalidOperationError(msg)
+        return ExprMetadata(
+            self.expansion_kind,
+            ExprKind.ORDERABLE_WINDOW,
+            has_windows=self.has_windows,
+            n_orderable_ops=self.n_orderable_ops + 1,
+            preserves_length=self.preserves_length,
+            is_elementwise=False,
+            is_scalar_like=False,
+            is_literal=False,
+        )
+
+    def with_ordered_over(self) -> ExprMetadata:
+        if self.has_windows:
+            msg = "Cannot nest `over` statements."
+            raise InvalidOperationError(msg)
+        if self.is_elementwise or self.is_filtration:
+            msg = (
+                "Cannot use `over` on expressions which are elementwise\n"
+                "(e.g. `abs`) or which change length (e.g. `drop_nulls`)."
+            )
+            raise InvalidOperationError(msg)
+        n_orderable_ops = self.n_orderable_ops
+        if not n_orderable_ops:
+            msg = "Cannot use `order_by` in `over` on expression which isn't orderable."
+            raise InvalidOperationError(msg)
+        if self.last_node.is_orderable_window:
+            n_orderable_ops -= 1
+        return ExprMetadata(
+            self.expansion_kind,
+            ExprKind.OVER,
+            has_windows=True,
+            n_orderable_ops=n_orderable_ops,
+            preserves_length=True,
+            is_elementwise=False,
+            is_scalar_like=False,
+            is_literal=False,
+        )
+
+    def with_partitioned_over(self) -> ExprMetadata:
+        if self.has_windows:
+            msg = "Cannot nest `over` statements."
+            raise InvalidOperationError(msg)
+        if self.is_elementwise or self.is_filtration:
+            msg = (
+                "Cannot use `over` on expressions which are elementwise\n"
+                "(e.g. `abs`) or which change length (e.g. `drop_nulls`)."
+            )
+            raise InvalidOperationError(msg)
+        return ExprMetadata(
+            self.expansion_kind,
+            ExprKind.OVER,
+            has_windows=True,
+            n_orderable_ops=self.n_orderable_ops,
+            preserves_length=True,
+            is_elementwise=False,
+            is_scalar_like=False,
+            is_literal=False,
+        )
+
+    def with_filtration(self) -> ExprMetadata:
+        if self.is_scalar_like:
+            msg = "Can't apply filtration (e.g. `drop_nulls`) to scalar-like expression."
+            raise InvalidOperationError(msg)
+        return ExprMetadata(
+            self.expansion_kind,
+            ExprKind.FILTRATION,
+            has_windows=self.has_windows,
+            n_orderable_ops=self.n_orderable_ops,
+            preserves_length=False,
+            is_elementwise=False,
+            is_scalar_like=False,
+            is_literal=False,
+        )
+
+    def with_orderable_filtration(self) -> ExprMetadata:
+        if self.is_scalar_like:
+            msg = "Can't apply filtration (e.g. `drop_nulls`) to scalar-like expression."
+            raise InvalidOperationError(msg)
+        return ExprMetadata(
+            self.expansion_kind,
+            ExprKind.ORDERABLE_FILTRATION,
+            has_windows=self.has_windows,
+            n_orderable_ops=self.n_orderable_ops + 1,
+            preserves_length=False,
+            is_elementwise=False,
+            is_scalar_like=False,
+            is_literal=False,
+        )
+
+    @staticmethod
+    def aggregation() -> ExprMetadata:
+        return ExprMetadata(
+            ExpansionKind.SINGLE,
+            ExprKind.AGGREGATION,
+            is_elementwise=False,
+            preserves_length=False,
+            is_scalar_like=True,
+        )
+
+    @staticmethod
+    def literal() -> ExprMetadata:
+        return ExprMetadata(
+            ExpansionKind.SINGLE,
+            ExprKind.LITERAL,
+            is_elementwise=False,
+            preserves_length=False,
+            is_literal=True,
+            is_scalar_like=True,
+        )
+
+    @staticmethod
+    def selector_single() -> ExprMetadata:
+        # e.g. `nw.col('a')`, `nw.nth(0)`
+        return ExprMetadata(ExpansionKind.SINGLE, ExprKind.ELEMENTWISE)
+
+    @staticmethod
+    def selector_multi_named() -> ExprMetadata:
+        # e.g. `nw.col('a', 'b')`
+        return ExprMetadata(ExpansionKind.MULTI_NAMED, ExprKind.ELEMENTWISE)
+
+    @staticmethod
+    def selector_multi_unnamed() -> ExprMetadata:
+        # e.g. `nw.all()`
+        return ExprMetadata(ExpansionKind.MULTI_UNNAMED, ExprKind.ELEMENTWISE)
+
+    @classmethod
+    def from_binary_op(cls, lhs: Expr, rhs: IntoExpr, /) -> ExprMetadata:
+        # We may be able to allow multi-output rhs in the future:
+        # https://github.com/narwhals-dev/narwhals/issues/2244.
+        return combine_metadata(
+            lhs, rhs, str_as_lit=True, allow_multi_output=False, to_single_output=False
+        )
+
+    @classmethod
+    def from_horizontal_op(cls, *exprs: IntoExpr) -> ExprMetadata:
+        return combine_metadata(
+            *exprs, str_as_lit=False, allow_multi_output=True, to_single_output=True
+        )
+
+
+def combine_metadata(  # noqa: C901, PLR0912
+    *args: IntoExpr | object | None,
+    str_as_lit: bool,
+    allow_multi_output: bool,
+    to_single_output: bool,
+) -> ExprMetadata:
+    """Combine metadata from `args`.
+
+    Arguments:
+        args: Arguments, maybe expressions, literals, or Series.
+        str_as_lit: Whether to interpret strings as literals or as column names.
+        allow_multi_output: Whether to allow multi-output inputs.
+        to_single_output: Whether the result is always single-output, regardless
+            of the inputs (e.g. `nw.sum_horizontal`).
+    """
+    n_filtrations = 0
+    result_expansion_kind = ExpansionKind.SINGLE
+    result_has_windows = False
+    result_n_orderable_ops = 0
+    # result preserves length if at least one input does
+    result_preserves_length = False
+    # result is elementwise if all inputs are elementwise
+    result_is_not_elementwise = False
+    # result is scalar-like if all inputs are scalar-like
+    result_is_not_scalar_like = False
+    # result is literal if all inputs are literal
+    result_is_not_literal = False
+
+    for i, arg in enumerate(args):  # noqa: PLR1702
+        if (isinstance(arg, str) and not str_as_lit) or is_series(arg):
+            result_preserves_length = True
+            result_is_not_scalar_like = True
+            result_is_not_literal = True
+        elif is_expr(arg):
+            metadata = arg._metadata
+            if metadata.expansion_kind.is_multi_output():
+                expansion_kind = metadata.expansion_kind
+                if i > 0 and not allow_multi_output:
+                    # Left-most argument is always allowed to be multi-output.
+                    msg = (
+                        "Multi-output expressions (e.g. nw.col('a', 'b'), nw.all()) "
+                        "are not supported in this context."
+                    )
+                    raise MultiOutputExpressionError(msg)
+                if not to_single_output:
+                    if i == 0:
+                        result_expansion_kind = expansion_kind
+                    else:
+                        result_expansion_kind = result_expansion_kind & expansion_kind
+
+            if metadata.has_windows:
+                result_has_windows = True
+            result_n_orderable_ops += metadata.n_orderable_ops
+            if metadata.preserves_length:
+                result_preserves_length = True
+            if not metadata.is_elementwise:
+                result_is_not_elementwise = True
+            if not metadata.is_scalar_like:
+                result_is_not_scalar_like = True
+            if not metadata.is_literal:
+                result_is_not_literal = True
+            if metadata.is_filtration:
+                n_filtrations += 1
+
+    if n_filtrations > 1:
+        msg = "Length-changing expressions can only be used in isolation, or followed by an aggregation"
+        raise LengthChangingExprError(msg)
+    if result_preserves_length and n_filtrations:
+        msg = "Cannot combine length-changing expressions with length-preserving ones or aggregations"
+        raise ShapeError(msg)
+
+    return ExprMetadata(
+        result_expansion_kind,
+        ExprKind.NARY,
+        has_windows=result_has_windows,
+        n_orderable_ops=result_n_orderable_ops,
+        preserves_length=result_preserves_length,
+        is_elementwise=not result_is_not_elementwise,
+        is_scalar_like=not result_is_not_scalar_like,
+        is_literal=not result_is_not_literal,
+    )
+
+
+def check_expressions_preserve_length(*args: IntoExpr, function_name: str) -> None:
+    # Raise if any argument in `args` isn't length-preserving.
+    # For Series input, we don't raise (yet), we let such checks happen later,
+    # as this function works lazily and so can't evaluate lengths.
+    from narwhals.series import Series
+
+    if not all(
+        (is_expr(x) and x._metadata.preserves_length) or isinstance(x, (str, Series))
+        for x in args
+    ):
+        msg = f"Expressions which aggregate or change length cannot be passed to '{function_name}'."
+        raise ShapeError(msg)
+
+
+def all_exprs_are_scalar_like(*args: IntoExpr, **kwargs: IntoExpr) -> bool:
+    # Raise if any argument in `args` isn't an aggregation or literal.
+    # For Series input, we don't raise (yet), we let such checks happen later,
+    # as this function works lazily and so can't evaluate lengths.
+    exprs = chain(args, kwargs.values())
+    return all(is_expr(x) and x._metadata.is_scalar_like for x in exprs)
+
+
+def apply_n_ary_operation(
+    plx: CompliantNamespaceAny,
+    function: Any,
+    *comparands: IntoExpr | NonNestedLiteral | _1DArray,
+    str_as_lit: bool,
+) -> CompliantExprAny:
+    compliant_exprs = (
+        extract_compliant(plx, comparand, str_as_lit=str_as_lit)
+        for comparand in comparands
+    )
+    kinds = [
+        ExprKind.from_into_expr(comparand, str_as_lit=str_as_lit)
+        for comparand in comparands
+    ]
+
+    broadcast = any(not kind.is_scalar_like for kind in kinds)
+    compliant_exprs = (
+        compliant_expr.broadcast(kind)
+        if broadcast and is_compliant_expr(compliant_expr) and is_scalar_like(kind)
+        else compliant_expr
+        for compliant_expr, kind in zip(compliant_exprs, kinds)
+    )
+    return function(*compliant_exprs)
-- 
cgit v1.2.3-70-g09d2