Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ opendal = { version = "0.54.0", features = ["services-http"] }
tokio = { version = "1.41.1", features = ["rt-multi-thread"] }
zarrs_opendal = "0.9.0"
itertools = "0.14.0"
pyo3-object_store = "0.7.0" # object_store 0.12
zarrs_object_store = "0.5.0" # object_store 0.12

[profile.release]
lto = true
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,11 @@ We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class b

At the moment, we only support a subset of the `zarr-python` stores:

- [x] [LocalStore](https://zarr.readthedocs.io/en/latest/_autoapi/zarr/storage/index.html#zarr.storage.LocalStore) (FileSystem)
- [FsspecStore](https://zarr.readthedocs.io/en/latest/_autoapi/zarr/storage/index.html#zarr.storage.FsspecStore)
- [x] [HTTPFileSystem](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem)
- [`LocalStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.LocalStore) (local filesystem)
- [`ObjectStore`](https://zarr.readthedocs.io/en/latest/user-guide/storage/#object-store) (cloud storage)
- [`HTTPFileSystem`](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem) via [`FsspecStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.FsspecStore)

A `NotImplementedError` will be raised if a store is not supported.
We intend to support more stores in the future: https://github.com/zarrs/zarrs-python/issues/44.

### Configuration

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ test = [
"aiohttp",
"fsspec>2024",
"numcodecs>=0.16.1",
"obstore>=0.8.2",
"pytest",
"pytest-asyncio",
"pytest-xdist",
Expand Down
13 changes: 13 additions & 0 deletions src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use pyo3::{
exceptions::{PyNotImplementedError, PyValueError},
types::{PyAnyMethods, PyStringMethods, PyTypeMethods},
};
use pyo3_object_store::PyExternalObjectStore;
use zarrs::storage::{
ReadableWritableListableStorage, storage_adapter::async_to_sync::AsyncToSyncStorageAdapter,
};
Expand All @@ -14,14 +15,17 @@ use crate::{runtime::tokio_block_on, utils::PyErrExt};

mod filesystem;
mod http;
mod obstore;

pub use self::filesystem::FilesystemStoreConfig;
pub use self::http::HttpStoreConfig;
pub use self::obstore::ObStoreConfig;

#[derive(Debug, Clone)]
pub enum StoreConfig {
Filesystem(FilesystemStoreConfig),
Http(HttpStoreConfig),
ObStore(ObStoreConfig),
// TODO: Add support for more stores
}

Expand Down Expand Up @@ -53,6 +57,13 @@ impl<'py> FromPyObject<'_, 'py> for StoreConfig {
))),
}
}
"ObjectStore" => {
let underlying_store = store.getattr("store")?;
let external_object_store: PyExternalObjectStore = underlying_store.extract()?;
let object_store: Arc<dyn zarrs_object_store::object_store::ObjectStore> =
external_object_store.into_dyn();
Comment on lines +63 to +64

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be clear here, what this is doing is taking the configuration from an existing obstore store and recreating a new store with the same parameters in the currently-linked object_store crate. So that means any connection pooling will not be shared between the user's input obstore instance and the instance you use to make your own fetches.

That's why the original idea in pyo3-object_store was that downstream crates would re-export the obstore store creation API, so that there wouldn't be any of this "dynamic linking" across crates. Since we don't have real dynamic linking, in effect we have to re-create all the necessary resources in the target pyo3 module.

Ok(StoreConfig::ObStore(ObStoreConfig::new(object_store)))
}
_ => Err(PyErr::new::<PyNotImplementedError, _>(format!(
"zarrs-python does not support {name} stores"
))),
Expand All @@ -65,6 +76,7 @@ impl StoreConfig {
match self {
StoreConfig::Filesystem(config) => config.direct_io(flag),
StoreConfig::Http(_config) => (),
StoreConfig::ObStore(_config) => (),
}
}
}
Expand All @@ -82,6 +94,7 @@ impl TryFrom<&StoreConfig> for ReadableWritableListableStorage {
match value {
StoreConfig::Filesystem(config) => config.try_into(),
StoreConfig::Http(config) => config.try_into(),
StoreConfig::ObStore(config) => config.try_into(),
}
}
}
Expand Down
33 changes: 33 additions & 0 deletions src/store/obstore.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
use std::sync::Arc;

use pyo3::PyErr;
use zarrs::storage::{
ReadableWritableListableStorage, storage_adapter::async_to_sync::AsyncToSyncStorageAdapter,
};
use zarrs_object_store::{AsyncObjectStore, object_store::ObjectStore};

use crate::runtime::tokio_block_on;

#[derive(Debug, Clone)]
pub struct ObStoreConfig {
store: Arc<dyn ObjectStore>,
}

impl ObStoreConfig {
pub fn new(store: Arc<dyn ObjectStore>) -> Self {
Self { store }
}
}

impl TryInto<ReadableWritableListableStorage> for &ObStoreConfig {
type Error = PyErr;

fn try_into(self) -> Result<ReadableWritableListableStorage, Self::Error> {
let async_store = Arc::new(AsyncObjectStore::new(self.store.clone()));
let sync_store = Arc::new(AsyncToSyncStorageAdapter::new(
async_store,
tokio_block_on(),
));
Ok(sync_store)
}
}
59 changes: 59 additions & 0 deletions tests/test_obstore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import tempfile
import warnings

import numpy as np
import zarr
from obstore.store import HTTPStore, LocalStore
from zarr.storage import ObjectStore

from .test_zarrs_http import ARR_REF, URL

# Suppress the expected warning about cross-library object store usage
warnings.filterwarnings(
"ignore",
message="Successfully reconstructed a store defined in another Python module",
)


def test_obstore_local_store():
"""Test zarrs-python with obstore LocalStore"""
with tempfile.TemporaryDirectory() as tmpdir:
# Create obstore LocalStore and wrap with zarr's ObjectStore
obstore_local = LocalStore(prefix=tmpdir)
store = ObjectStore(obstore_local, read_only=False)

# Create zarr array with obstore
# Expect a warning about cross-library object store usage
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
arr = zarr.open_array(
store=store,
mode="w",
shape=(100, 100),
chunks=(10, 10),
dtype="f4",
)

# Write data
data = np.random.rand(100, 100).astype("f4")
arr[:] = data

# Read back and verify
arr2 = zarr.open_array(store=store, mode="r")
assert arr2.shape == (100, 100)
assert np.allclose(arr2[:], data)


def test_obstore_http():
"""Test zarrs-python with obstore HTTPStore - similar to test_zarrs_http"""
# Create HTTPStore from the test URL
http_store = HTTPStore.from_url(URL)
store = ObjectStore(http_store, read_only=True)

with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
arr = zarr.open_array(store=store, mode="r")

# Verify shape and data match the reference
assert arr.shape == (8, 8)
assert np.allclose(arr[:], ARR_REF, equal_nan=True)
2 changes: 1 addition & 1 deletion tests/test_zarrs_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
]
)

URL = "https://raw.githubusercontent.com/LDeakin/zarrs/main/zarrs/tests/data/array_write_read.zarr/group/array"
URL = "https://raw.githubusercontent.com/zarrs/zarrs/main/zarrs/tests/data/array_write_read.zarr/group/array"


def test_zarrs_http():
Expand Down