Skip to content

Commit 18e63a6

Browse files
committed
move exception on na_object-bearing-string-dtype to inside from_native_dtype
1 parent db792f6 commit 18e63a6

File tree

3 files changed

+43
-7
lines changed

3 files changed

+43
-7
lines changed

src/zarr/core/dtype/npy/string.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,43 @@ class VariableLengthUTF8(UTF8Base[np.dtypes.StringDType]): # type: ignore[type-
742742

743743
dtype_cls = np.dtypes.StringDType
744744

745+
@classmethod
746+
def from_native_dtype(cls, dtype: TBaseDType) -> Self:
747+
"""
748+
Create an instance of this data type from a compatible NumPy data type.
749+
We reject NumPy StringDType instances that have the `na_object` field set,
750+
because this is not representable by the Zarr `string` data type.
751+
752+
Parameters
753+
----------
754+
dtype : TBaseDType
755+
The native data type.
756+
757+
Returns
758+
-------
759+
Self
760+
An instance of this data type.
761+
762+
Raises
763+
------
764+
DataTypeValidationError
765+
If the input is not compatible with this data type.
766+
ValueError
767+
If the input is `numpy.dtypes.StringDType` and has `na_object` set.
768+
"""
769+
if cls._check_native_dtype(dtype):
770+
if hasattr(dtype, "na_object"):
771+
msg = (
772+
f"Zarr data type resolution from {dtype} failed. "
773+
"Attempted to resolve a zarr data type from a `numpy.dtypes.StringDType` "
774+
"with `na_object` set, which is not supported."
775+
)
776+
raise ValueError(msg)
777+
return cls()
778+
raise DataTypeValidationError(
779+
f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
780+
)
781+
745782
def to_native_dtype(self) -> np.dtypes.StringDType:
746783
"""
747784
Create a NumPy string dtype from this VariableLengthUTF8 ZDType.

src/zarr/core/dtype/registry.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -159,15 +159,12 @@ def match_dtype(self, dtype: TBaseDType) -> ZDType[TBaseDType, TBaseScalar]:
159159
"data type, see https://github.com/zarr-developers/zarr-python/issues/3117"
160160
)
161161
raise ValueError(msg)
162-
if dtype.kind == "T" and hasattr(dtype, "na_object"):
163-
msg = (
164-
f"Zarr data type resolution from {dtype} failed. "
165-
"Attempted to resolve a zarr data type from a `numpy.dtypes.StringDType` "
166-
"with `na_object` set, which is not supported."
167-
)
168-
raise ValueError(msg)
169162
matched: list[ZDType[TBaseDType, TBaseScalar]] = []
170163
for val in self.contents.values():
164+
# DataTypeValidationError means "this dtype doesn't match me", which is
165+
# expected and suppressed. Other exceptions (e.g. ValueError for a dtype
166+
# that matches the type but has an invalid configuration) are propagated
167+
# to the caller.
171168
with contextlib.suppress(DataTypeValidationError):
172169
matched.append(val.from_native_dtype(dtype))
173170
if len(matched) == 1:

tests/test_dtype_registry.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from zarr.dtype import ( # type: ignore[attr-defined]
2020
Bool,
2121
FixedLengthUTF32,
22+
VariableLengthUTF8,
2223
ZDType,
2324
data_type_registry,
2425
parse_data_type,
@@ -80,6 +81,7 @@ def test_match_dtype(
8081
def test_match_dtype_string_na_object_error(
8182
data_type_registry_fixture: DataTypeRegistry,
8283
) -> None:
84+
data_type_registry_fixture.register(VariableLengthUTF8._zarr_v3_name, VariableLengthUTF8) # type: ignore[arg-type]
8385
dtype: np.dtype[Any] = np.dtypes.StringDType(na_object=None) # type: ignore[call-arg]
8486
with pytest.raises(ValueError, match=r"Zarr data type resolution from StringDType.*failed"):
8587
data_type_registry_fixture.match_dtype(dtype)

0 commit comments

Comments
 (0)