diff --git a/Cargo.toml b/Cargo.toml index f3afd94..dd3341f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,8 @@ opendal = { version = "0.54.0", features = ["services-http"] } tokio = { version = "1.41.1", features = ["rt-multi-thread"] } zarrs_opendal = "0.9.0" itertools = "0.14.0" +pyo3-object_store = "0.7.0" # object_store 0.12 +zarrs_object_store = "0.5.0" # object_store 0.12 [profile.release] lto = true diff --git a/README.md b/README.md index 4a206f3..f06add7 100644 --- a/README.md +++ b/README.md @@ -24,12 +24,11 @@ We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class b At the moment, we only support a subset of the `zarr-python` stores: -- [x] [LocalStore](https://zarr.readthedocs.io/en/latest/_autoapi/zarr/storage/index.html#zarr.storage.LocalStore) (FileSystem) -- [FsspecStore](https://zarr.readthedocs.io/en/latest/_autoapi/zarr/storage/index.html#zarr.storage.FsspecStore) - - [x] [HTTPFileSystem](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem) +- [`LocalStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.LocalStore) (local filesystem) +- [`ObjectStore`](https://zarr.readthedocs.io/en/latest/user-guide/storage/#object-store) (cloud storage) +- [`HTTPFileSystem`](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem) via [`FsspecStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.FsspecStore) A `NotImplementedError` will be raised if a store is not supported. -We intend to support more stores in the future: https://github.com/zarrs/zarrs-python/issues/44. ### Configuration diff --git a/pyproject.toml b/pyproject.toml index ef8f4eb..b1cc006 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ test = [ "aiohttp", "fsspec>2024", "numcodecs>=0.16.1", + "obstore>=0.8.2", "pytest", "pytest-asyncio", "pytest-xdist", diff --git a/src/store.rs b/src/store.rs index f15fa50..132b436 100644 --- a/src/store.rs +++ b/src/store.rs @@ -6,6 +6,7 @@ use pyo3::{ exceptions::{PyNotImplementedError, PyValueError}, types::{PyAnyMethods, PyStringMethods, PyTypeMethods}, }; +use pyo3_object_store::PyExternalObjectStore; use zarrs::storage::{ ReadableWritableListableStorage, storage_adapter::async_to_sync::AsyncToSyncStorageAdapter, }; @@ -14,14 +15,17 @@ use crate::{runtime::tokio_block_on, utils::PyErrExt}; mod filesystem; mod http; +mod obstore; pub use self::filesystem::FilesystemStoreConfig; pub use self::http::HttpStoreConfig; +pub use self::obstore::ObStoreConfig; #[derive(Debug, Clone)] pub enum StoreConfig { Filesystem(FilesystemStoreConfig), Http(HttpStoreConfig), + ObStore(ObStoreConfig), // TODO: Add support for more stores } @@ -53,6 +57,13 @@ impl<'py> FromPyObject<'_, 'py> for StoreConfig { ))), } } + "ObjectStore" => { + let underlying_store = store.getattr("store")?; + let external_object_store: PyExternalObjectStore = underlying_store.extract()?; + let object_store: Arc = + external_object_store.into_dyn(); + Ok(StoreConfig::ObStore(ObStoreConfig::new(object_store))) + } _ => Err(PyErr::new::(format!( "zarrs-python does not support {name} stores" ))), @@ -65,6 +76,7 @@ impl StoreConfig { match self { StoreConfig::Filesystem(config) => config.direct_io(flag), StoreConfig::Http(_config) => (), + StoreConfig::ObStore(_config) => (), } } } @@ -82,6 +94,7 @@ impl TryFrom<&StoreConfig> for ReadableWritableListableStorage { match value { StoreConfig::Filesystem(config) => config.try_into(), StoreConfig::Http(config) => config.try_into(), + StoreConfig::ObStore(config) => config.try_into(), } } } diff --git a/src/store/obstore.rs b/src/store/obstore.rs new file mode 100644 index 0000000..029b187 --- /dev/null +++ b/src/store/obstore.rs @@ -0,0 +1,33 @@ +use std::sync::Arc; + +use pyo3::PyErr; +use zarrs::storage::{ + ReadableWritableListableStorage, storage_adapter::async_to_sync::AsyncToSyncStorageAdapter, +}; +use zarrs_object_store::{AsyncObjectStore, object_store::ObjectStore}; + +use crate::runtime::tokio_block_on; + +#[derive(Debug, Clone)] +pub struct ObStoreConfig { + store: Arc, +} + +impl ObStoreConfig { + pub fn new(store: Arc) -> Self { + Self { store } + } +} + +impl TryInto for &ObStoreConfig { + type Error = PyErr; + + fn try_into(self) -> Result { + let async_store = Arc::new(AsyncObjectStore::new(self.store.clone())); + let sync_store = Arc::new(AsyncToSyncStorageAdapter::new( + async_store, + tokio_block_on(), + )); + Ok(sync_store) + } +} diff --git a/tests/test_obstore.py b/tests/test_obstore.py new file mode 100644 index 0000000..3541688 --- /dev/null +++ b/tests/test_obstore.py @@ -0,0 +1,59 @@ +import tempfile +import warnings + +import numpy as np +import zarr +from obstore.store import HTTPStore, LocalStore +from zarr.storage import ObjectStore + +from .test_zarrs_http import ARR_REF, URL + +# Suppress the expected warning about cross-library object store usage +warnings.filterwarnings( + "ignore", + message="Successfully reconstructed a store defined in another Python module", +) + + +def test_obstore_local_store(): + """Test zarrs-python with obstore LocalStore""" + with tempfile.TemporaryDirectory() as tmpdir: + # Create obstore LocalStore and wrap with zarr's ObjectStore + obstore_local = LocalStore(prefix=tmpdir) + store = ObjectStore(obstore_local, read_only=False) + + # Create zarr array with obstore + # Expect a warning about cross-library object store usage + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + arr = zarr.open_array( + store=store, + mode="w", + shape=(100, 100), + chunks=(10, 10), + dtype="f4", + ) + + # Write data + data = np.random.rand(100, 100).astype("f4") + arr[:] = data + + # Read back and verify + arr2 = zarr.open_array(store=store, mode="r") + assert arr2.shape == (100, 100) + assert np.allclose(arr2[:], data) + + +def test_obstore_http(): + """Test zarrs-python with obstore HTTPStore - similar to test_zarrs_http""" + # Create HTTPStore from the test URL + http_store = HTTPStore.from_url(URL) + store = ObjectStore(http_store, read_only=True) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + arr = zarr.open_array(store=store, mode="r") + + # Verify shape and data match the reference + assert arr.shape == (8, 8) + assert np.allclose(arr[:], ARR_REF, equal_nan=True) diff --git a/tests/test_zarrs_http.py b/tests/test_zarrs_http.py index 3884e59..661f851 100644 --- a/tests/test_zarrs_http.py +++ b/tests/test_zarrs_http.py @@ -19,7 +19,7 @@ ] ) -URL = "https://raw.githubusercontent.com/LDeakin/zarrs/main/zarrs/tests/data/array_write_read.zarr/group/array" +URL = "https://raw.githubusercontent.com/zarrs/zarrs/main/zarrs/tests/data/array_write_read.zarr/group/array" def test_zarrs_http():