Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(api): improve ibis.struct() #9460

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ibis/backends/exasol/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class ExasolCompiler(SQLGlotCompiler):
ops.StringSplit,
ops.StringToDate,
ops.StringToTimestamp,
ops.StructColumn,
ops.TimeDelta,
ops.TimestampAdd,
ops.TimestampBucket,
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/sqlite/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class SQLiteCompiler(SQLGlotCompiler):
ops.TimestampDiff,
ops.StringToDate,
ops.StringToTimestamp,
ops.StructColumn,
ops.TimeDelta,
ops.DateDelta,
ops.TimestampDelta,
Expand Down
77 changes: 62 additions & 15 deletions ibis/backends/tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@
import ibis.expr.datatypes as dt
from ibis import util
from ibis.backends.tests.errors import (
ClickHouseDatabaseError,
PolarsColumnNotFoundError,
PsycoPg2InternalError,
PsycoPg2SyntaxError,
Py4JJavaError,
PySparkAnalysisException,
)
from ibis.common.annotations import ValidationError
from ibis.common.exceptions import IbisError

pytestmark = [
Expand All @@ -28,6 +30,62 @@
pytest.mark.notimpl(["datafusion", "druid", "oracle", "exasol"]),
]

mark_notimpl_postgres_literals = pytest.mark.notimpl(
"postgres", reason="struct literals not implemented", raises=PsycoPg2SyntaxError
)


@pytest.mark.notimpl("risingwave")
@pytest.mark.broken("postgres", reason="JSON handling is buggy")
@pytest.mark.notimpl(
"flink",
raises=Py4JJavaError,
reason="Unexpected error in type inference logic of function 'COALESCE'",
)
def test_struct_factory(con):
s = ibis.struct({"a": 1, "b": 2})
assert con.execute(s) == {"a": 1, "b": 2}

s2 = ibis.struct(s)
assert con.execute(s2) == {"a": 1, "b": 2}

typed = ibis.struct({"a": 1, "b": 2}, type="struct<a: string, b: string>")
assert con.execute(typed) == {"a": "1", "b": "2"}

typed2 = ibis.struct(s, type="struct<a: string, b: string>")
assert con.execute(typed2) == {"a": "1", "b": "2"}

items = ibis.struct([("a", 1), ("b", 2)])
assert con.execute(items) == {"a": 1, "b": 2}


@pytest.mark.parametrize("type", ["struct<>", "struct<a: int64, b: int64>"])
@pytest.mark.parametrize("val", [{}, []])
def test_struct_factory_empty(val, type):
with pytest.raises(ValidationError):
ibis.struct(val, type=type)

Check warning on line 66 in ibis/backends/tests/test_struct.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/tests/test_struct.py#L66

Added line #L66 was not covered by tests


@pytest.mark.notimpl("risingwave")
@mark_notimpl_postgres_literals
@pytest.mark.notyet(
"clickhouse", raises=ClickHouseDatabaseError, reason="nested types can't be NULL"
)
@pytest.mark.broken(
"polars",
reason=r"pl.lit(None, type='struct<a: int64>') gives {'a': None}: https://github.com/pola-rs/polars/issues/3462",
)
def test_struct_factory_null(con):
with pytest.raises(ValidationError):
ibis.struct(None)
none_typed = ibis.struct(None, type="struct<a: float64, b: float>")
assert none_typed.type() == dt.Struct(fields={"a": dt.float64, "b": dt.float64})
assert con.execute(none_typed) is None
# Execute a real value here, so the backends that don't support structs
# actually xfail as we expect them to.
# Otherwise would have to @mark.xfail every test in this file besides this one.
assert con.execute(ibis.struct({"a": 1, "b": 2})) == {"a": 1, "b": 2}


@pytest.mark.notimpl(["dask"])
@pytest.mark.parametrize(
Expand Down Expand Up @@ -78,6 +136,9 @@

@pytest.mark.notimpl(["postgres", "risingwave"])
@pytest.mark.parametrize("field", ["a", "b", "c"])
@pytest.mark.notyet(
["flink"], reason="flink doesn't support creating struct columns from literals"
)
def test_literal(backend, con, field):
query = _STRUCT_LITERAL[field]
dtype = query.type().to_pandas()
Expand All @@ -87,7 +148,7 @@
backend.assert_series_equal(result, expected.astype(dtype))


@pytest.mark.notimpl(["postgres"])
@mark_notimpl_postgres_literals
@pytest.mark.parametrize("field", ["a", "b", "c"])
@pytest.mark.notyet(
["clickhouse"], reason="clickhouse doesn't support nullable nested types"
Expand Down Expand Up @@ -137,14 +198,6 @@
assert len(val.loc[result.group == "1"].iat[0]["key"]) == 730


@pytest.mark.notimpl(
["postgres"], reason="struct literals not implemented", raises=PsycoPg2SyntaxError
)
@pytest.mark.notimpl(
["risingwave"],
reason="struct literals not implemented",
raises=PsycoPg2InternalError,
)
@pytest.mark.notimpl(["flink"], raises=Py4JJavaError, reason="not implemented in ibis")
def test_field_access_after_case(con):
s = ibis.struct({"a": 3})
Expand Down Expand Up @@ -240,12 +293,6 @@
raises=PolarsColumnNotFoundError,
reason="doesn't seem to support IN-style subqueries on structs",
)
@pytest.mark.notimpl(
# https://github.com/pandas-dev/pandas/issues/58909
["pandas", "dask"],
raises=TypeError,
reason="unhashable type: 'dict'",
)
@pytest.mark.xfail_version(
pyspark=["pyspark<3.5"],
reason="requires pyspark 3.5",
Expand Down
75 changes: 52 additions & 23 deletions ibis/expr/types/structs.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,33 @@
from __future__ import annotations

import collections
from keyword import iskeyword
from typing import TYPE_CHECKING

from public import public

import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.types as ir
from ibis.common.annotations import ValidationError
from ibis.common.deferred import deferrable
from ibis.common.exceptions import IbisError
from ibis.expr.types.generic import Column, Scalar, Value, literal
from ibis.expr.types.generic import Column, Scalar, Value

if TYPE_CHECKING:
from collections.abc import Iterable, Mapping, Sequence

import ibis.expr.datatypes as dt
import ibis.expr.types as ir
from ibis.expr.types.typing import V


@public
@deferrable
def struct(
value: Iterable[tuple[str, V]] | Mapping[str, V],
value: Iterable[tuple[str, V]]
| Mapping[str, V]
| StructValue
| ir.NullValue
| None,
*,
type: str | dt.DataType | None = None,
) -> StructValue:
"""Create a struct expression.
Expand All @@ -37,8 +42,7 @@
`(str, Value)`.
type
An instance of `ibis.expr.datatypes.DataType` or a string indicating
the Ibis type of `value`. This is only used if all of the input values
are Python literals. eg `struct<a: float, b: string>`.
the Ibis type of `value`. eg `struct<a: float, b: string>`.

Returns
-------
Expand Down Expand Up @@ -66,26 +70,51 @@
Create a struct column from a column and a scalar literal

>>> t = ibis.memtable({"a": [1, 2, 3]})
>>> ibis.struct([("a", t.a), ("b", "foo")])
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ StructColumn()
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ struct<a: int64, b: string> │
├─────────────────────────────┤
│ {'a': 1, 'b': 'foo'} │
│ {'a': 2, 'b': 'foo'} │
│ {'a': 3, 'b': 'foo'} │
└─────────────────────────────┘
>>> ibis.struct([("a", t.a), ("b", "foo")], type="struct<a: float, b: string>")
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Cast(StructColumn(), struct<a: float64, b: string>)
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
│ struct<a: float64, b: string>
├─────────────────────────────────────────────────────
│ {'a': 1.0, 'b': 'foo'}
│ {'a': 2.0, 'b': 'foo'}
│ {'a': 3.0, 'b': 'foo'}
└─────────────────────────────────────────────────────
"""
import ibis.expr.operations as ops

type = dt.dtype(type) if type is not None else None
if type is not None and not isinstance(type, dt.Struct):
raise ValidationError(f"type must be an struct, got {type}")

Check warning on line 88 in ibis/expr/types/structs.py

View check run for this annotation

Codecov / codecov/patch

ibis/expr/types/structs.py#L88

Added line #L88 was not covered by tests

if isinstance(value, ir.Value):
if type is not None:
return value.cast(type)
elif isinstance(value, StructValue):
return value
else:
raise ValidationError(

Check warning on line 96 in ibis/expr/types/structs.py

View check run for this annotation

Codecov / codecov/patch

ibis/expr/types/structs.py#L96

Added line #L96 was not covered by tests
f"If no type passed, value must be a struct, got {value.type()}"
)

if value is None:
if type is None:
raise ValidationError("If values is None/NULL, type must be provided")
return ir.null(type)

fields = dict(value)
if any(isinstance(value, Value) for value in fields.values()):
names = tuple(fields.keys())
values = tuple(fields.values())
return ops.StructColumn(names=names, values=values).to_expr()
else:
return literal(collections.OrderedDict(fields), type=type)
if not fields:
raise ValidationError("Struct must have at least one field")

Check warning on line 107 in ibis/expr/types/structs.py

View check run for this annotation

Codecov / codecov/patch

ibis/expr/types/structs.py#L107

Added line #L107 was not covered by tests
names = fields.keys()
result = ops.StructColumn(names=names, values=fields.values()).to_expr()
if type is not None:
if not set(names).issuperset(type.names):
raise ValidationError(
f"The passed type requires fields {type.names}",
f" but only found fields {names}",
)
result = result.cast(type)
return result


@public
Expand Down
7 changes: 2 additions & 5 deletions ibis/tests/expr/test_literal.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import ibis
import ibis.expr.datatypes as dt
from ibis.common.collections import frozendict
from ibis.common.annotations import ValidationError
from ibis.expr.operations import Literal
from ibis.tests.util import assert_pickle_roundtrip

Expand Down Expand Up @@ -109,9 +109,6 @@ def test_normalized_underlying_value(userinput, literal_type, expected_type):
def test_struct_literal(value):
typestr = "struct<field1: string, field2: float64>"
a = ibis.struct(value, type=typestr)
assert a.op().value == frozendict(
field1=str(value["field1"]), field2=float(value["field2"])
)
assert a.type() == dt.dtype(typestr)


Expand All @@ -123,7 +120,7 @@ def test_struct_literal(value):
],
)
def test_struct_literal_non_castable(value):
with pytest.raises(TypeError, match="Unable to normalize"):
with pytest.raises(ValidationError):
ibis.struct(value, type="struct<field1: string, field2: float64>")


Expand Down
Loading