From a3a080ab8d94f4a0ba81b18348c2aa000f3524c6 Mon Sep 17 00:00:00 2001 From: Richard Bowman Date: Mon, 12 Jan 2026 17:03:49 +0000 Subject: [PATCH 01/11] Add middleware to app --- src/labthings_fastapi/server/__init__.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/labthings_fastapi/server/__init__.py b/src/labthings_fastapi/server/__init__.py index eb7434f..716a791 100644 --- a/src/labthings_fastapi/server/__init__.py +++ b/src/labthings_fastapi/server/__init__.py @@ -18,6 +18,7 @@ from collections.abc import Mapping, Sequence from types import MappingProxyType +from ..middleware.url_for import url_for_middleware from ..thing_slots import ThingSlot from ..utilities import class_attributes @@ -86,6 +87,7 @@ def __init__( self._config = ThingServerConfig(things=things, settings_folder=settings_folder) self.app = FastAPI(lifespan=self.lifespan) self._set_cors_middleware() + self._set_url_for_middleware() self.settings_folder = settings_folder or "./settings" self.action_manager = ActionManager() self.action_manager.attach_to_app(self.app) @@ -129,6 +131,15 @@ def _set_cors_middleware(self) -> None: allow_headers=["*"], ) + def _set_url_for_middleware(self) -> None: + """Add middleware to support `url_for` in Pydantic models. + + This middleware adds a request state variable that allows + `labthings_fastapi.server.URLFor` instances to be serialised + using FastAPI's `url_for` function. + """ + self.app.middleware("http")(url_for_middleware) + @property def things(self) -> Mapping[str, Thing]: """Return a dictionary of all the things. From c9bc8cacc3b40f1d3a25adec0d2f517161ee4f13 Mon Sep 17 00:00:00 2001 From: Richard Bowman Date: Mon, 12 Jan 2026 17:03:49 +0000 Subject: [PATCH 02/11] Eliminate BlobManagerContextDept This commit makes use of the new `url_for` middleware to eliminate the Blob-specific context variables. BlobData objects are now added to a singleton BlobManager when they are created, and the URL is filled in at serialisation time. This is a slight simplification of the old behaviour, but it's equivalent in all the ways that matter. --- src/labthings_fastapi/actions.py | 39 +---- src/labthings_fastapi/outputs/blob.py | 222 ++++++++------------------ tests/test_blob_output.py | 20 +-- 3 files changed, 81 insertions(+), 200 deletions(-) diff --git a/src/labthings_fastapi/actions.py b/src/labthings_fastapi/actions.py index 95b9910..df3b449 100644 --- a/src/labthings_fastapi/actions.py +++ b/src/labthings_fastapi/actions.py @@ -47,10 +47,8 @@ from .exceptions import ( InvocationCancelledError, InvocationError, - NoBlobManagerError, NotConnectedToServerError, ) -from .outputs.blob import BlobIOContextDep, blobdata_to_url_ctx from . import invocation_contexts from .utilities.introspection import ( EmptyInput, @@ -149,23 +147,7 @@ def id(self) -> uuid.UUID: @property def output(self) -> Any: - """Return value of the Action. If the Action is still running, returns None. - - :raise NoBlobManagerError: If this is called in a context where the blob - manager context variables are not available. This stops errors being raised - later once the blob is returned and tries to serialise. If the errors - happen during serialisation the stack-trace will not clearly identify - the route with the missing dependency. - """ - try: - blobdata_to_url_ctx.get() - except LookupError as e: - raise NoBlobManagerError( - "An invocation output has been requested from a api route that " - "doesn't have a BlobIOContextDep dependency. This dependency is needed " - " for blobs to identify their url." - ) from e - + """Return value of the Action. If the Action is still running, returns None.""" with self._status_lock: return self._return_value @@ -470,25 +452,19 @@ def attach_to_app(self, app: FastAPI) -> None: """ @app.get(ACTION_INVOCATIONS_PATH, response_model=list[InvocationModel]) - def list_all_invocations( - request: Request, _blob_manager: BlobIOContextDep - ) -> list[InvocationModel]: + def list_all_invocations(request: Request) -> list[InvocationModel]: return self.list_invocations(request=request) @app.get( ACTION_INVOCATIONS_PATH + "/{id}", responses={404: {"description": "Invocation ID not found"}}, ) - def action_invocation( - id: uuid.UUID, request: Request, _blob_manager: BlobIOContextDep - ) -> InvocationModel: + def action_invocation(id: uuid.UUID, request: Request) -> InvocationModel: """Return a description of a specific action. :param id: The action's ID (from the path). :param request: FastAPI dependency for the request object, used to find URLs via ``url_for``. - :param _blob_manager: FastAPI dependency that enables `.Blob` objects - to be serialised. :return: Details of the invocation. @@ -518,17 +494,13 @@ def action_invocation( 503: {"description": "No result is available for this invocation"}, }, ) - def action_invocation_output( - id: uuid.UUID, _blob_manager: BlobIOContextDep - ) -> Any: + def action_invocation_output(id: uuid.UUID) -> Any: """Get the output of an action invocation. This returns just the "output" component of the action invocation. If the output is a file, it will return the file. :param id: The action's ID (from the path). - :param _blob_manager: FastAPI dependency that enables `.Blob` objects - to be serialised. :return: The output of the invocation, as a `pydantic.BaseModel` instance. If this is a `.Blob`, it may be returned directly. @@ -806,7 +778,6 @@ def add_to_fastapi(self, app: FastAPI, thing: Thing) -> None: # The solution below is to manually add the annotation, before passing # the function to the decorator. def start_action( - _blob_manager: BlobIOContextDep, request: Request, body: Any, # This annotation will be overwritten below. id: NonWarningInvocationID, @@ -884,7 +855,7 @@ def start_action( ), summary=f"All invocations of {self.name}.", ) - def list_invocations(_blob_manager: BlobIOContextDep) -> list[InvocationModel]: + def list_invocations() -> list[InvocationModel]: action_manager = thing._thing_server_interface._action_manager return action_manager.list_invocations(self, thing) diff --git a/src/labthings_fastapi/outputs/blob.py b/src/labthings_fastapi/outputs/blob.py index a873b04..38910ee 100644 --- a/src/labthings_fastapi/outputs/blob.py +++ b/src/labthings_fastapi/outputs/blob.py @@ -39,27 +39,22 @@ def get_image(self) -> MyImageBlob: """ from __future__ import annotations -from contextvars import ContextVar import io import os import re import shutil from typing import ( - Annotated, Any, - AsyncGenerator, - Callable, Literal, Mapping, Optional, ) from warnings import warn from weakref import WeakValueDictionary -from typing_extensions import TypeAlias from tempfile import TemporaryDirectory import uuid -from fastapi import FastAPI, Depends, Request +from fastapi import FastAPI from fastapi.responses import FileResponse, Response from pydantic import ( BaseModel, @@ -67,8 +62,8 @@ def get_image(self) -> MyImageBlob: model_serializer, model_validator, ) -from starlette.exceptions import HTTPException from typing_extensions import Self, Protocol, runtime_checkable +from labthings_fastapi.middleware.url_for import url_for @runtime_checkable @@ -87,13 +82,8 @@ class BlobData(Protocol): which adds a `response()` method and `id` property. """ - @property - def media_type(self) -> str: - """The MIME type of the data, e.g. 'image/png' or 'application/json'. - - :raises NotImplementedError: always, as this must be implemented by subclasses. - """ - raise NotImplementedError("media_type property must be implemented.") + media_type: str + """The MIME type of the data, e.g. 'image/png' or 'application/json'.""" @property def content(self) -> bytes: @@ -129,12 +119,25 @@ class ServerSideBlobData(BlobData, Protocol): See `.BlobBytes` or `.BlobFile` for concrete implementations. """ - id: Optional[uuid.UUID] = None - """A unique identifier for this BlobData object. + def __init__(self, media_type: str) -> None: + """Initialise the ServerSideBlobData object. - The ID is set when the BlobData object is added to the BlobDataManager. - It is used to retrieve the BlobData object from the manager. - """ + :param media_type: the MIME type of the data. + """ + self._id = blob_data_manager.add_blob(self) + self.media_type = media_type + + _id: uuid.UUID + media_type: str + + @property + def id(self) -> uuid.UUID: + """A unique identifier for this BlobData object. + + The ID is set when the BlobData object is added to the `BlobDataManager` + during initialisation. + """ + return self._id def response(self) -> Response: """Return a`fastapi.Response` object that sends binary data. @@ -144,7 +147,7 @@ def response(self) -> Response: ... # pragma: no cover -class BlobBytes: +class BlobBytes(ServerSideBlobData): """A `.Blob` that holds its data in memory as a `bytes` object. `.Blob` objects use objects conforming to the `.BlobData` protocol to @@ -157,8 +160,7 @@ class BlobBytes: `.Blob.from_bytes` on a `.Blob` subclass. """ - id: Optional[uuid.UUID] = None - """A unique ID to identify the data in a `.BlobManager`.""" + _id: uuid.UUID def __init__(self, data: bytes, media_type: str) -> None: """Create a `.BlobBytes` object. @@ -169,8 +171,8 @@ def __init__(self, data: bytes, media_type: str) -> None: :param data: is the data to be wrapped. :param media_type: is the MIME type of the data. """ + super().__init__(media_type=media_type) self._bytes = data - self.media_type = media_type @property def content(self) -> bytes: @@ -202,7 +204,7 @@ def response(self) -> Response: return Response(content=self._bytes, media_type=self.media_type) -class BlobFile: +class BlobFile(ServerSideBlobData): """A `.Blob` that holds its data in a file. `.Blob` objects use objects conforming to the `.BlobData` protocol to @@ -219,8 +221,7 @@ class BlobFile: `.Blob.from_temporary_directory` on a `.Blob` subclass. """ - id: Optional[uuid.UUID] = None - """A unique ID to identify the data in a `.BlobManager`.""" + _id: uuid.UUID def __init__(self, file_path: str, media_type: str, **kwargs: Any) -> None: r"""Create a `.BlobFile` to wrap data stored on disk. @@ -237,6 +238,7 @@ def __init__(self, file_path: str, media_type: str, **kwargs: Any) -> None: :raise IOError: if the file specified does not exist. """ + super().__init__(media_type=media_type) if not os.path.exists(file_path): raise IOError("Tried to return a file that doesn't exist.") self._file_path = file_path @@ -347,69 +349,47 @@ def retrieve_data(self) -> Self: `.Blob` that already exists on this server, and any other URL will cause a `LookupError`. - This validator will only work if the function to resolve URLs to - `.BlobData` objects - has been set in the context variable `.blob.url_to_blobdata_ctx`\ . - This is done when actions are being invoked over HTTP by the - `.BlobIOContextDep` dependency. - :return: the `.Blob` object (i.e. ``self``), after retrieving the data. :raise ValueError: if the ``href`` is set as ``"blob://local"`` but the ``_data`` attribute has not been set. This happens when the `.Blob` is being constructed using `.Blob.from_bytes` or similar. - :raise LookupError: if the `.Blob` is being constructed from a URL - and the URL does not correspond to a `.BlobData` instance that - exists on this server (i.e. one that has been previously created - and added to the `.BlobManager` as the result of a previous action). """ if self.href == "blob://local": if self._data: return self raise ValueError("Blob objects must have data if the href is blob://local") + id = url_to_id(self.href) + if not id: + raise ValueError("Blob URLs must contain a Blob ID.") try: - url_to_blobdata = url_to_blobdata_ctx.get() - self._data = url_to_blobdata(self.href) + self._data = blob_data_manager.get_blob(id) self.href = "blob://local" - except LookupError as e: - raise LookupError( - "Blobs may only be created from URLs passed in over HTTP." - f"The URL in question was {self.href}." - ) from e - return self + return self + except KeyError as error: + raise ValueError(f"Blob ID {id} wasn't found on this server.") from error @model_serializer(mode="plain", when_used="always") def to_dict(self) -> Mapping[str, str]: r"""Serialise the Blob to a dictionary and make it downloadable. When `pydantic` serialises this object, - it will call this method to convert it to a dictionary. There is a - significant side-effect, which is that we will add the blob to the - `.BlobDataManager` so it can be downloaded. - - This serialiser will only work if the function to assign URLs to - `.BlobData` objects has been set in the context variable - `.blobdata_to_url_ctx`\ . - This is done when actions are being returned over HTTP by the - `.BlobIOContextDep` dependency. + it will call this method to convert it to a dictionary. We use + `.from_url.from_url` to generate the URL, so this will error if + it is serialised anywhere other than a request handler with the + middleware from `.middleware.url_for` enabled. :return: a JSON-serialisable dictionary with a URL that allows the `.Blob` to be downloaded from the `.BlobManager`. - - :raise LookupError: if the context variable providing access to the - `.BlobManager` is not available. This usually means the `.Blob` is - being serialised somewhere other than the output of an action. + :raises TypeError: if the blob data ID is missing. This should + never happen, and if it does it's probably a bug in the + `.BlobData` class. """ if self.href == "blob://local": - try: - blobdata_to_url = blobdata_to_url_ctx.get() - # MyPy seems to miss that `self.data` is a property, hence the ignore - href = blobdata_to_url(self.data) # type: ignore[arg-type] - except LookupError as e: - raise LookupError( - "Blobs may only be serialised inside the " - "context created by BlobIOContextDep." - ) from e + id = getattr(self._data, "id", None) + if not isinstance(id, uuid.UUID): + raise TypeError("A BlobData id is missing. This is a LabThings Bug.") + href = str(url_for("download_blob", blob_id=id)) else: href = self.href return { @@ -653,17 +633,14 @@ def add_blob(self, blob: ServerSideBlobData) -> uuid.UUID: data. This suggests the object has been added to another `.BlobDataManager`, which should never happen. """ - if hasattr(blob, "id") and blob.id is not None: - if blob.id in self._blobs: - return blob.id - else: - raise ValueError( - f"BlobData already has an ID {blob.id} " - "but was not found in this BlobDataManager" - ) - blob.id = uuid.uuid4() - self._blobs[blob.id] = blob - return blob.id + if blob in self._blobs.values(): + raise ValueError( + "BlobData objects may only be added to the manager once! " + "This is a LabThings bug." + ) + id = uuid.uuid4() + self._blobs[id] = blob + return id def get_blob(self, blob_id: uuid.UUID) -> ServerSideBlobData: """Retrieve a `.Blob` from the manager. @@ -699,89 +676,26 @@ def attach_to_app(self, app: FastAPI) -> None: :param app: the `fastapi.FastAPI` application to which we are adding the endpoint. """ - app.get("/blob/{blob_id}")(self.download_blob) + app.get( + "/blob/{blob_id}", + name="download_blob", + )(self.download_blob) blob_data_manager = BlobDataManager() """A global register of all BlobData objects.""" -blobdata_to_url_ctx = ContextVar[Callable[[ServerSideBlobData], str]]("blobdata_to_url") -"""This context variable gives access to a function that makes BlobData objects -downloadable, by assigning a URL and adding them to the -[`BlobDataManager`](#labthings_fastapi.outputs.blob.BlobDataManager). - -It is only available within a -[`blob_serialisation_context_manager`](#labthings_fastapi.outputs.blob.blob_serialisation_context_manager) -because it requires access to the `BlobDataManager` and the `url_for` function -from the FastAPI app. -""" - -url_to_blobdata_ctx = ContextVar[Callable[[str], ServerSideBlobData]]("url_to_blobdata") -"""This context variable gives access to a function that makes BlobData objects -from a URL, by retrieving them from the -[`BlobDataManager`](#labthings_fastapi.outputs.blob.BlobDataManager). - -It is only available within a -[`blob_serialisation_context_manager`](#labthings_fastapi.outputs.blob.blob_serialisation_context_manager) -because it requires access to the `BlobDataManager`. -""" - - -async def blob_serialisation_context_manager( - request: Request, -) -> AsyncGenerator[BlobDataManager, None]: - r"""Set context variables to allow blobs to be [de]serialised. +def url_to_id(url: str) -> uuid.UUID | None: + """Extract the blob ID from a URL. - In order to serialise a `.Blob` to a JSON-serialisable dictionary, we must - add it to the `.BlobDataManager` and use that to generate a URL. This - requires that the serialisation code (which may be nested deep within a - `pydantic.BaseModel`) has access to the `.BlobDataManager` and also the - `fastapi.Request.url_for` method. At time of writing, there was not an - obvious way to pass these functions in to the serialisation code. + Currently, this checks for a UUID at the end of a URL. In the future, + it might check if the URL refers to this server. - Similar problems exist for blobs used as input: the validator needs to - retrieve the data from the `.BlobDataManager` but does not have access. - - This async context manager yields the `.BlobDataManager`, but more - importantly it sets the `.url_to_blobdata_ctx` and `blobdata_to_url_ctx` - context variables, which may be accessed by the code within `.Blob` to - correctly add and retrieve `.ServerSideBlobData` objects to and from the - `.BlobDataManager`\ . - - This function will usually be called from a FastAPI dependency. See - :ref:`dependencies` for more on that mechanism. - - :param request: the `fastapi.Request` object, used to access the server - and ``url_for`` method. - - :yield: the `.BlobDataManager`. This is usually ignored. + :param url: a URL previously generated by `blobdata_to_url`. + :return: the UUID blob ID extracted from the URL. """ - url_for = request.url_for - - def blobdata_to_url(blob: ServerSideBlobData) -> str: - blob_id = blob_data_manager.add_blob(blob) - return str(url_for("download_blob", blob_id=blob_id)) - - def url_to_blobdata(url: str) -> ServerSideBlobData: - m = re.search(r"blob/([0-9a-z\-]+)", url) - if not m: - raise HTTPException( - status_code=404, detail="Could not find blob ID in href" - ) - invocation_id = uuid.UUID(m.group(1)) - return blob_data_manager.get_blob(invocation_id) - - t1 = blobdata_to_url_ctx.set(blobdata_to_url) - t2 = url_to_blobdata_ctx.set(url_to_blobdata) - try: - yield blob_data_manager - finally: - blobdata_to_url_ctx.reset(t1) - url_to_blobdata_ctx.reset(t2) - - -BlobIOContextDep: TypeAlias = Annotated[ - BlobDataManager, Depends(blob_serialisation_context_manager) -] -"""A dependency that enables `.Blob` to be serialised and deserialised.""" + m = re.search(r"blob/([0-9a-z\-]+)", url) + if not m: + return None + return uuid.UUID(m.group(1)) diff --git a/tests/test_blob_output.py b/tests/test_blob_output.py index 8a15562..3b04f67 100644 --- a/tests/test_blob_output.py +++ b/tests/test_blob_output.py @@ -11,7 +11,8 @@ import pytest import labthings_fastapi as lt from labthings_fastapi.client.outputs import ClientBlobOutput -from labthings_fastapi.testing import create_thing_without_server +from labthings_fastapi.exceptions import FailedToInvokeActionError +from labthings_fastapi.testing import create_thing_without_server, use_dummy_url_for class TextBlob(lt.blob.Blob): @@ -93,8 +94,6 @@ class BadBlob(lt.blob.BlobData): assert not isinstance(bad_blob, lt.blob.ServerSideBlobData) - with pytest.raises(NotImplementedError): - _ = bad_blob.media_type with pytest.raises(NotImplementedError): _ = bad_blob.content @@ -142,13 +141,10 @@ def test_blob_serialisation(): with pytest.raises(PydanticSerializationError): blob.model_dump() # Fake the required context variable, and it should work - try: - token = lt.outputs.blob.blobdata_to_url_ctx.set(lambda b: "https://example/") + with use_dummy_url_for(): data = blob.model_dump() - assert data["href"] == "https://example/" - assert data["media_type"] == "text/plain" - finally: - lt.outputs.blob.blobdata_to_url_ctx.reset(token) + assert data["href"].startswith("urlfor://download_blob/?blob_id=") + assert data["media_type"] == "text/plain" # Blobs that already refer to a remote URL should serialise without error # though there's currently no way to create one on the server. @@ -222,15 +218,15 @@ def test_blob_input(client): bad_blob = ClientBlobOutput( media_type="text/plain", href=f"http://nonexistent.local/blob/{uuid4()}" ) - with pytest.raises(LookupError): + with pytest.raises(FailedToInvokeActionError, match="wasn't found"): tc.passthrough_blob(blob=bad_blob) # Try again with a totally bogus URL bad_blob = ClientBlobOutput( media_type="text/plain", href="http://nonexistent.local/totally_bogus" ) - msg = "Error when invoking action passthrough_blob: Could not find blob ID in href" - with pytest.raises(lt.exceptions.FailedToInvokeActionError, match=msg): + msg = "must contain a Blob ID" + with pytest.raises(FailedToInvokeActionError, match=msg): tc.passthrough_blob(blob=bad_blob) # Check that the same thing works on the server side From 0c4a4d9263fb6fdc39d50729b3882e958250c5e4 Mon Sep 17 00:00:00 2001 From: Richard Bowman Date: Mon, 12 Jan 2026 17:03:49 +0000 Subject: [PATCH 03/11] Major tidy up of Blob Having now learned more about custom types in pydantic, I've done some more tidying here: * Blob is no longer a BaseModel subclass. I've separated out the model (used for serialisation/validation) and the class that user code will interact with. * BlobData is now a base class not a protocol, and there's a subclass for remote blob data that downloads on demand. This removes most of the complicated logic from `Blob` around when we do and don't need a `BlobData`: a `Blob` is **always** backed by `BlobData` whether it's local or remote. This also means we can get rid of `ClientBlobOutput` and just use `Blob` instead. --- src/labthings_fastapi/outputs/blob.py | 457 +++++++++++++++++--------- tests/test_blob_output.py | 34 +- 2 files changed, 310 insertions(+), 181 deletions(-) diff --git a/src/labthings_fastapi/outputs/blob.py b/src/labthings_fastapi/outputs/blob.py index 38910ee..3f6c48f 100644 --- a/src/labthings_fastapi/outputs/blob.py +++ b/src/labthings_fastapi/outputs/blob.py @@ -39,6 +39,7 @@ def get_image(self) -> MyImageBlob: """ from __future__ import annotations +from collections.abc import Callable import io import os import re @@ -47,7 +48,6 @@ def get_image(self) -> MyImageBlob: Any, Literal, Mapping, - Optional, ) from warnings import warn from weakref import WeakValueDictionary @@ -56,34 +56,52 @@ def get_image(self) -> MyImageBlob: from fastapi import FastAPI from fastapi.responses import FileResponse, Response +import httpx from pydantic import ( BaseModel, - create_model, - model_serializer, - model_validator, + GetCoreSchemaHandler, ) -from typing_extensions import Self, Protocol, runtime_checkable +from pydantic_core import core_schema +from typing_extensions import Self from labthings_fastapi.middleware.url_for import url_for -@runtime_checkable -class BlobData(Protocol): - """The interface for the data store of a Blob. +class BlobData: + """The data store of a Blob. `.Blob` objects can represent their data in various ways. Each of those options must provide three ways to access the data, which are the `content` property, the `save()` method, and the `open()` method. - This protocol defines the interface needed by any data store used by a + This base class defines the interface needed by any data store used by a `.Blob`. - Objects that are used on the server will additionally need to implement the - [`ServerSideBlobData`](#labthings_fastapi.outputs.blob.ServerSideBlobData) protocol, - which adds a `response()` method and `id` property. + Blobs that store their data locally should subclass `.LocalBlobData` + which adds a `response()` method and `id` property, appropriate for data + that would need to be downloaded from a server. It also takes care of + generating a download URL when it's needed. """ - media_type: str - """The MIME type of the data, e.g. 'image/png' or 'application/json'.""" + def __init__(self, media_type: str) -> None: + """Initialise a `.BlobData` object. + + :param media_type: the MIME type of the data. + """ + self._media_type = media_type + + @property + def media_type(self) -> str: + """The MIME type of the data, e.g. 'image/png' or 'application/json'.""" + return self._media_type + + def get_href(self) -> str: + """Return the URL to download the blob. + + The implementation of this method for local blobs will need + `.url_for.url_for` and thus it should only be called in a response + handler when the `.middeware.url_for` middleware is enabled. + """ + raise NotImplementedError("get_href must be implemented.") @property def content(self) -> bytes: @@ -98,37 +116,98 @@ def save(self, filename: str) -> None: :param filename: the path where the file should be saved. """ - ... # pragma: no cover + raise NotImplementedError("save must be implemented.") def open(self) -> io.IOBase: """Return a file-like object that may be read from. :return: an open file-like object. """ - ... # pragma: no cover + raise NotImplementedError("open must be implemented.") + + +class RemoteBlobData(BlobData): + r"""A BlobData subclass that references remote data via a URL. + + This `.BlobData` implementation will download data lazily, and + provides it in the three ways defined by `.BlobData`\ . It + does not cache downloaded data: if the `.content` attribute is + accessed multiple times, the data will be downloaded again each + time. + + .. note:: + + This class is rarely instantiated directly. It is usually best to use + `.Blob.from_url` on a `.Blob` subclass. + """ + + def __init__( + self, media_type: str, href: str, client: httpx.Client | None = None + ) -> None: + """Create a reference to remote `.Blob` data. + + :param media_type: the MIME type of the data. + :param href: the URL where it may be downloaded. + :param client: if supplied, this `httpx.Client` will be used to + download the data. + """ + super().__init__(media_type=media_type) + self._href = href + self._client = client or httpx.Client() + + def get_href(self) -> str: + """Return the URL to download the data.""" + return self._href + + @property + def content(self) -> bytes: + """The binary data, as a `bytes` object.""" + return self._client.get(self._href).content + + def save(self, filepath: str) -> None: + """Save the output to a file. + + Note that the current implementation retrieves the data into + memory in its entirety, and saves to file afterwards. + + :param filepath: the file will be saved at this location. + """ + with open(filepath, "wb") as f: + f.write(self.content) + def open(self) -> io.IOBase: + """Open the output as a binary file-like object. + + Internally, this will download the file to memory, and wrap the + resulting `bytes` object in an `io.BytesIO` object to allow it to + function as a file-like object. -class ServerSideBlobData(BlobData, Protocol): - """A BlobData protocol for server-side use, i.e. including `response()`. + To work with the data on disk, use `save` instead. + + :return: a file-like object containing the downloaded data. + """ + return io.BytesIO(self.content) - `.Blob` objects returned by actions must use `.BlobData` objects - that can be downloaded. This protocol extends the `.BlobData` protocol to - include a `.ServerSideBlobData.response` method that returns a - `fastapi.Response` object. + +class LocalBlobData(BlobData): + """A BlobData subclass where the data is stored locally. + + `.Blob` objects can reference data by a URL, or can wrap data + held in memory or on disk. For the non-URL options, we need to register the + data with the `.BlobManager` and allow it to be downloaded. This class takes + care of registering with the `.BlobManager` and adds the `.response` method + that must be overridden by subclasses to allow downloading. See `.BlobBytes` or `.BlobFile` for concrete implementations. """ def __init__(self, media_type: str) -> None: - """Initialise the ServerSideBlobData object. + """Initialise the LocalBlobData object. :param media_type: the MIME type of the data. """ + super().__init__(media_type=media_type) self._id = blob_data_manager.add_blob(self) - self.media_type = media_type - - _id: uuid.UUID - media_type: str @property def id(self) -> uuid.UUID: @@ -139,15 +218,23 @@ def id(self) -> uuid.UUID: """ return self._id + def get_href(self) -> str: + r"""Return a URL where this data may be downloaded. + + Note that this should only be called in a response handler, as it + relies on `.url_for.url_for`\ . + """ + return str(url_for("download_blob", blob_id=self.id)) + def response(self) -> Response: """Return a`fastapi.Response` object that sends binary data. :return: a response that streams the data from disk or memory. """ - ... # pragma: no cover + raise NotImplementedError -class BlobBytes(ServerSideBlobData): +class BlobBytes(LocalBlobData): """A `.Blob` that holds its data in memory as a `bytes` object. `.Blob` objects use objects conforming to the `.BlobData` protocol to @@ -165,8 +252,10 @@ class BlobBytes(ServerSideBlobData): def __init__(self, data: bytes, media_type: str) -> None: """Create a `.BlobBytes` object. - `.BlobBytes` objects wrap data stored in memory as `bytes`. They - are not usually instantiated directly, but made using `.Blob.from_bytes`. + .. note:: + + This class is rarely instantiated directly. It is usually best to use + `.Blob.from_bytes` on a `.Blob` subclass. :param data: is the data to be wrapped. :param media_type: is the MIME type of the data. @@ -204,12 +293,8 @@ def response(self) -> Response: return Response(content=self._bytes, media_type=self.media_type) -class BlobFile(ServerSideBlobData): - """A `.Blob` that holds its data in a file. - - `.Blob` objects use objects conforming to the `.BlobData` protocol to - store their data either on disk or in a file. This implements the protocol - using a file on disk. +class BlobFile(LocalBlobData): + """A `.BlobData` backed by a file on disk. Only the filepath is retained by default. If you are using e.g. a temporary directory, you should add the `.TemporaryDirectory` as an instance attribute, @@ -218,11 +303,9 @@ class BlobFile(ServerSideBlobData): .. note:: This class is rarely instantiated directly. It is usually best to use - `.Blob.from_temporary_directory` on a `.Blob` subclass. + `.Blob.from_file` on a `.Blob` subclass. """ - _id: uuid.UUID - def __init__(self, file_path: str, media_type: str, **kwargs: Any) -> None: r"""Create a `.BlobFile` to wrap data stored on disk. @@ -242,7 +325,6 @@ def __init__(self, file_path: str, media_type: str, **kwargs: Any) -> None: if not os.path.exists(file_path): raise IOError("Tried to return a file that doesn't exist.") self._file_path = file_path - self.media_type = media_type for key, val in kwargs.items(): setattr(self, key, val) @@ -289,36 +371,16 @@ def response(self) -> Response: return FileResponse(self._file_path, media_type=self.media_type) -class Blob(BaseModel): - """A container for binary data that may be retrieved over HTTP. - - See :ref:`blobs` for more information on how to use this class. +class BlobModel(BaseModel): + """A model for JSON-serialised `.Blob` objects. - A `.Blob` may be created to hold data using the class methods - `.Blob.from_bytes`, `.Blob.from_file` or `.Blob.from_temporary_directory`. - The constructor will attempt to deserialise a Blob from a URL - (see `__init__` method) and is unlikely to be used except in code - internal to LabThings. - - You are strongly advised to use a subclass of this class that specifies the - `.Blob.media_type` attribute, as this will propagate to the auto-generated - documentation. + This model describes the JSON representation of a `.Blob` + and does not offer any useful functionality. """ href: str - """The URL where the data may be retrieved. - - `.Blob` objects on a `.ThingServer` are assigned a URL when they are - serialised to JSON. This allows them to be downloaded as binary data in a - separate HTTP request. - - `.Blob` objects created by a `.ThingClient` contain a URL pointing to the - data, which will be downloaded when it is required. - - `.Blob` objects that store their data in a file or in memory will have the - ``href`` attribute set to the special value `blob://local`. - """ - media_type: str = "*/*" + """The URL where the data may be retrieved.""" + media_type: str """The MIME type of the data. This should be overridden in subclasses.""" rel: Literal["output"] = "output" """The relation of this link to the host object. @@ -332,18 +394,95 @@ class Blob(BaseModel): ) """This description is added to the serialised `.Blob`.""" - _data: Optional[ServerSideBlobData] = None - """This object holds the data, either in memory or as a file. - If `_data` is `None`, then the Blob has not been deserialised yet, and the - `href` should point to a valid address where the data may be downloaded. +class Blob: + r"""A container for binary data that may be retrieved over HTTP. + + See :ref:`blobs` for more information on how to use this class. + + A `.Blob` may be created to hold data using the class methods + `.Blob.from_bytes`, `.Blob.from_file` or `.Blob.from_temporary_directory`\ . + It may also reference remote data, using `.Blob.from_url`\ . + The constructor requires a `.BlobData` instance, so the methods mentioned + previously are likely more convenient. + + You are strongly advised to use a subclass of this class that specifies the + `.Blob.media_type` attribute, as this will propagate to the auto-generated + documentation. + + This class is `pydantic` compatible, in that it provides a schema, validator + and serialiser. However, it may use `.url_for.url_for` during serialisation, + so it should only be serialised in a request handler function. This + functionality is intended for use by LabThings library functions only. """ - @model_validator(mode="after") - def retrieve_data(self) -> Self: - r"""Retrieve the data from the URL. + media_type: str = "*/*" + """The MIME type of the data. This should be overridden in subclasses.""" + description: str | None = None + """An optional description that may be added to the serialised `.Blob`.""" + _data: BlobData + """This object stores the data - in memory, on disk, or at a URL.""" + + def __init__(self, data: BlobData, description: str | None = None) -> None: + """Create a `.Blob` object wrapping the given data. + + :param data: the `.BlobData` object that stores the data. + :param description: an optional description of the blob. + """ + super().__init__() + self._data = data + if description is not None: + self.description = description + + @classmethod + def __get_pydantic_core_schema__( + cls, source: type[Any], handler: GetCoreSchemaHandler + ) -> core_schema.CoreSchema: + """Get the pydantic core schema for this type. + + This magic method allows `pydantic` to serialise `.Blob` + instances, and generate a JSONSchema for them. + + The representation of a `.Blob` in JSON is described by + `.BlobModel` and includes the ``href`` and ``media_type`` properties + as well as a description. + + When a `.Blob` is serialised, we will generate a download URL that + matches the request to which we are responding. This means we may + only serialise a `.Blob` in the context of a request handler, and + it's required that the `.middleware.url_for` middleware is in use. + + When a `.Blob` is validated, we will check to see if the URL given + as its ``href`` looks like a `.Blob` download URL on this server. If + it does, the returned object will hold a reference to the local data. + If we can't match the URL to a `.Blob` on this server, we will raise + an error. Handling of `.Blob` input is currently experimental, and + limited to passing the output of one Action as input to a subsequent + one. + + :param source: The source type being converted. + :param handler: The pydantic core schema handler. + :return: The pydantic core schema for the URLFor type. + """ + return core_schema.no_info_wrap_validator_function( + cls._validate, + BlobModel.__get_pydantic_core_schema__(BlobModel, handler), + serialization=core_schema.wrap_serializer_function_ser_schema( + cls._serialize, + is_field_serializer=False, + info_arg=False, + when_used="always", + ), + ) + + @classmethod + def _validate(cls, value: Any, handler: Callable[[Any], BlobModel]) -> Self: + r"""Validate and convert a value to a `.Blob` instance. - When a `.Blob` is created using its constructor, `pydantic` + :param value: The value to validate. + :param handler: The handler to convert the value if needed. + + When a `.Blob` is created from a dictionary, LabThings will attempt to deserialise it by retrieving the data from the URL specified in `.Blob.href`. Currently, this must be a URL pointing to a `.Blob` that already exists on this server, and any other URL will @@ -355,27 +494,39 @@ def retrieve_data(self) -> Self: the ``_data`` attribute has not been set. This happens when the `.Blob` is being constructed using `.Blob.from_bytes` or similar. """ - if self.href == "blob://local": - if self._data: - return self - raise ValueError("Blob objects must have data if the href is blob://local") - id = url_to_id(self.href) + # If the value is already a Blob, return it directly + if isinstance(value, cls): + return value + # We start by validating the input, which should fit a `BlobModel` + # (this validator is wrapping the BlobModel schema) + model = handler(value) + id = url_to_id(model.href) if not id: raise ValueError("Blob URLs must contain a Blob ID.") try: - self._data = blob_data_manager.get_blob(id) - self.href = "blob://local" - return self + data = blob_data_manager.get_blob(id) + return cls(data) except KeyError as error: raise ValueError(f"Blob ID {id} wasn't found on this server.") from error - @model_serializer(mode="plain", when_used="always") - def to_dict(self) -> Mapping[str, str]: - r"""Serialise the Blob to a dictionary and make it downloadable. + @classmethod + def _serialize( + cls, obj: Self, handler: Callable[[BlobModel], Mapping[str, str]] + ) -> Mapping[str, str]: + """Serialise the Blob to a dictionary. + + :param obj: the `.Blob` instance to serialise. + :return: a JSON-serialisable dictionary with a URL that allows + the `.Blob` to be downloaded from the `.BlobManager`. + """ + return handler(obj.to_blobmodel()) + + def to_blobmodel(self) -> BlobModel: + r"""Represent the `.Blob` as a `.BlobModel` to get ready to serialise. - When `pydantic` serialises this object, - it will call this method to convert it to a dictionary. We use - `.from_url.from_url` to generate the URL, so this will error if + When `pydantic` serialises this object, we first generate a `.BlobModel` + witht just the information to be serialised. + We use `.from_url.from_url` to generate the URL, so this will error if it is serialised anywhere other than a request handler with the middleware from `.middleware.url_for` enabled. @@ -385,54 +536,24 @@ def to_dict(self) -> Mapping[str, str]: never happen, and if it does it's probably a bug in the `.BlobData` class. """ - if self.href == "blob://local": - id = getattr(self._data, "id", None) - if not isinstance(id, uuid.UUID): - raise TypeError("A BlobData id is missing. This is a LabThings Bug.") - href = str(url_for("download_blob", blob_id=id)) - else: - href = self.href - return { - "href": href, + data = { + "href": self.data.get_href(), "media_type": self.media_type, - "rel": self.rel, - "description": self.description, } - - @classmethod - def default_media_type(cls) -> str: - """Return the default media type. - - `.Blob` should generally be subclassed to define the default media type, - as this forms part of the auto-generated documentation. Using the - `.Blob` class directly will result in a media type of `*/*`, which makes - it unclear what format the output is in. - - :return: the default media type as a MIME type string, e.g. ``image/png``. - """ - return cls.model_fields["media_type"].get_default() + if self.description is not None: + data["description"] = self.description + return BlobModel(**data) @property - def data(self) -> ServerSideBlobData: + def data(self) -> BlobData: """The data store for this Blob. - `.Blob` objects may hold their data in various ways, defined by the - `.ServerSideBlobData` protocol. This property returns the data store - for this `.Blob`. - - If the `.Blob` has not yet been downloaded, there may be no data - held locally, in which case this function will raise an exception. - It is recommended to use the `.Blob.content` property or `.Blob.save` or `.Blob.open` methods rather than accessing this property directly. :return: the data store wrapping data on disk or in memory. - - :raise ValueError: if there is no data stored on disk or in memory. """ - if self._data is None: - raise ValueError("This Blob has no data.") return self._data @property @@ -484,10 +605,7 @@ def from_bytes(cls, data: bytes) -> Self: :return: a `.Blob` wrapping the supplied data. """ - return cls.model_construct( # type: ignore[return-value] - href="blob://local", - _data=BlobBytes(data, media_type=cls.default_media_type()), - ) + return cls(BlobBytes(data, media_type=cls.media_type)) @classmethod def from_temporary_directory(cls, folder: TemporaryDirectory, file: str) -> Self: @@ -509,11 +627,10 @@ def from_temporary_directory(cls, folder: TemporaryDirectory, file: str) -> Self :return: a `.Blob` wrapping the file. """ file_path = os.path.join(folder.name, file) - return cls.model_construct( # type: ignore[return-value] - href="blob://local", - _data=BlobFile( + return cls( + BlobFile( file_path, - media_type=cls.default_media_type(), + media_type=cls.media_type, # Prevent the temporary directory from being cleaned up _temporary_directory=folder, ), @@ -538,9 +655,30 @@ def from_file(cls, file: str) -> Self: :return: a `.Blob` object referencing the specified file. """ - return cls.model_construct( # type: ignore[return-value] - href="blob://local", - _data=BlobFile(file, media_type=cls.default_media_type()), + return cls( + BlobFile(file, media_type=cls.media_type), + ) + + @classmethod + def from_url(cls, href: str, client: httpx.Client | None = None) -> Self: + """Create a `.Blob` that references data at a URL. + + This is the recommended way to create a `.Blob` that references + data held remotely. It should ideally be called on a subclass + of `.Blob` that has set ``media_type``. + + :param href: the URL where the data may be downloaded. + :param client: if supplied, this `httpx.Client` will be used to + download the data. + + :return: a `.Blob` object referencing the specified URL. + """ + return cls( + RemoteBlobData( + media_type=cls.media_type, + href=href, + client=client, + ), ) def response(self) -> Response: @@ -551,7 +689,13 @@ def response(self) -> Response: :return: an HTTP response that streams data from memory or file. """ - return self.data.response() + data = self.data + if isinstance(data, LocalBlobData): + return data.response() + else: + raise NotImplementedError( + "Currently, only local BlobData can be served over HTTP." + ) def blob_type(media_type: str) -> type[Blob]: @@ -565,10 +709,9 @@ def blob_type(media_type: str) -> type[Blob]: class MyImageBlob(Blob): media_type = "image/png" - :param media_type: will be the default value of the ``media_type`` property - on the `.Blob` subclass. + :param media_type: the media type that the new `.Blob` subclass will use. - :return: a subclass of `.Blob` with the specified default media type. + :return: a subclass of `.Blob` with the specified media type. :raise ValueError: if the media type contains ``'`` or ``\``. """ @@ -580,14 +723,12 @@ class MyImageBlob(Blob): ) if "'" in media_type or "\\" in media_type: raise ValueError("media_type must not contain single quotes or backslashes") - return create_model( + return type( f"{media_type.replace('/', '_')}_blob", - __base__=Blob, - media_type=(eval(f"Literal[r'{media_type}']"), media_type), # noqa: S307 - # This can't be done with `literal_eval` as that does not support subscripts. - # Basic sanitisation is done above by removing backslashes and single quotes, - # and using a raw string. However, the long term solution is to remove this - # function in favour of subclassing Blob, as recommended in the docs. + (Blob,), + { + "media_type": media_type, + }, ) @@ -600,7 +741,7 @@ class BlobDataManager: reference, and will be expired by the `.ActionManager`. Note that the `.BlobDataManager` does not work with `.Blob` objects directly, - it holds only the `.ServerSideBlobData` object, which is where the data is + it holds only the `.LocalBlobData` object, which is where the data is stored. This means you should not rely on any custom attributes of a `.Blob` subclass being preserved when the `.Blob` is passed from one action to another. @@ -609,26 +750,26 @@ class BlobDataManager: def __init__(self) -> None: """Initialise a BlobDataManager object.""" - self._blobs: WeakValueDictionary[uuid.UUID, ServerSideBlobData] = ( + self._blobs: WeakValueDictionary[uuid.UUID, LocalBlobData] = ( WeakValueDictionary() ) - def add_blob(self, blob: ServerSideBlobData) -> uuid.UUID: + def add_blob(self, blob: LocalBlobData) -> uuid.UUID: """Add a `.Blob` to the manager, generating a unique ID. - This function adds a `.ServerSideBlobData` object to the + This function adds a `.LocalBlobData` object to the `.BlobDataManager`. It will retain a weak reference to the - `.ServerSideBlobData` object: you are responsible for ensuring + `.LocalBlobData` object: you are responsible for ensuring the data is not garbage collected, for example by including the parent `.Blob` in the output of an action. - :param blob: a `.ServerSideBlobData` object that holds the data + :param blob: a `.LocalBlobData` object that holds the data being added. :return: a unique ID identifying the data. This forms part of the URL to download the data. - :raise ValueError: if the `.ServerSideBlobData` object already + :raise ValueError: if the `.LocalBlobData` object already has an ``id`` attribute but is not in the dictionary of data. This suggests the object has been added to another `.BlobDataManager`, which should never happen. @@ -642,13 +783,13 @@ def add_blob(self, blob: ServerSideBlobData) -> uuid.UUID: self._blobs[id] = blob return id - def get_blob(self, blob_id: uuid.UUID) -> ServerSideBlobData: + def get_blob(self, blob_id: uuid.UUID) -> LocalBlobData: """Retrieve a `.Blob` from the manager. :param blob_id: the unique ID assigned when the data was added to this `.BlobDataManager`. - :return: the `.ServerSideBlobData` object holding the data. + :return: the `.LocalBlobData` object holding the data. """ return self._blobs[blob_id] @@ -656,7 +797,7 @@ def download_blob(self, blob_id: uuid.UUID) -> Response: """Download a `.Blob`. This function returns a `fastapi.Response` allowing the data to be - downloaded, using the `.ServerSideBlobData.response` method. + downloaded, using the `.LocalBlobData.response` method. :param blob_id: the unique ID assigned when the data was added to this `.BlobDataManager`. @@ -671,7 +812,7 @@ def attach_to_app(self, app: FastAPI) -> None: """Attach the BlobDataManager to a FastAPI app. Add an endpoint to a FastAPI application that will serve the content of - the `.ServerSideBlobData` objects in response to ``GET`` requests. + the `.LocalBlobData` objects in response to ``GET`` requests. :param app: the `fastapi.FastAPI` application to which we are adding the endpoint. diff --git a/tests/test_blob_output.py b/tests/test_blob_output.py index 3b04f67..02ffb09 100644 --- a/tests/test_blob_output.py +++ b/tests/test_blob_output.py @@ -7,6 +7,7 @@ from uuid import uuid4 from fastapi.testclient import TestClient +from pydantic import TypeAdapter from pydantic_core import PydanticSerializationError import pytest import labthings_fastapi as lt @@ -84,20 +85,6 @@ def client(): yield client -def test_blobdata_protocol(): - """Check the definition of the blobdata protocol, and the implementations.""" - - class BadBlob(lt.blob.BlobData): - pass - - bad_blob = BadBlob() - - assert not isinstance(bad_blob, lt.blob.ServerSideBlobData) - - with pytest.raises(NotImplementedError): - _ = bad_blob.content - - @pytest.mark.filterwarnings("ignore:.*removed in v0.1.0.*:DeprecationWarning") def test_blob_type(): """Check we can't put dodgy values into a blob output model""" @@ -108,7 +95,7 @@ def test_blob_type(): def test_blob_creation(): - """Check that blobs can be created in three ways""" + """Check that blobs can be created in four ways""" TEXT = b"Test input" # Create a blob from a file in a temporary directory td = TemporaryDirectory() @@ -129,9 +116,12 @@ def test_blob_creation(): with pytest.raises(IOError): _ = TextBlob.from_temporary_directory(td, "nonexistent") - # Finally, check we can make a blob from a bytes object, no file. + # Check we can make a blob from a bytes object, no file. blob = TextBlob.from_bytes(TEXT) assert blob.content == TEXT + # Check we can make a blob from a URL + blob = TextBlob.from_url(href="https://example.com/blob") + assert blob.to_blobmodel().href == "https://example.com/blob" def test_blob_serialisation(): @@ -139,20 +129,18 @@ def test_blob_serialisation(): blob = TextBlob.from_bytes(b"Some data") # Can't serialise a blob (with data) without a BlobDataManager with pytest.raises(PydanticSerializationError): - blob.model_dump() + TypeAdapter(TextBlob).dump_python(blob) # Fake the required context variable, and it should work with use_dummy_url_for(): - data = blob.model_dump() + data = TypeAdapter(TextBlob).dump_python(blob) assert data["href"].startswith("urlfor://download_blob/?blob_id=") assert data["media_type"] == "text/plain" - # Blobs that already refer to a remote URL should serialise without error - # though there's currently no way to create one on the server. - remoteblob = TextBlob.model_construct( - media_type="text/plain", + # Blobs that already refer to a remote URL should serialise without error. + remoteblob = TextBlob.from_url( href="https://example/", ) - data = remoteblob.model_dump() + data = TypeAdapter(TextBlob).dump_python(remoteblob) assert data["href"] == "https://example/" assert data["media_type"] == "text/plain" From 461ba1a42c63888efa4e8fcd8fde887e7c7d23b8 Mon Sep 17 00:00:00 2001 From: Richard Bowman Date: Mon, 12 Jan 2026 17:03:50 +0000 Subject: [PATCH 04/11] Improve Blob JSONSchema This now correctly tells clients the media type, and uses a descriptive title. I believe it's now at least as good as the old schema. --- src/labthings_fastapi/outputs/blob.py | 34 +++++++++++++++++++++++++++ tests/test_blob_output.py | 23 +++++++++++++++++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src/labthings_fastapi/outputs/blob.py b/src/labthings_fastapi/outputs/blob.py index 3f6c48f..0897a01 100644 --- a/src/labthings_fastapi/outputs/blob.py +++ b/src/labthings_fastapi/outputs/blob.py @@ -60,7 +60,9 @@ def get_image(self) -> MyImageBlob: from pydantic import ( BaseModel, GetCoreSchemaHandler, + GetJsonSchemaHandler, ) +from pydantic.json_schema import JsonSchemaValue from pydantic_core import core_schema from typing_extensions import Self from labthings_fastapi.middleware.url_for import url_for @@ -443,6 +445,13 @@ def __get_pydantic_core_schema__( This magic method allows `pydantic` to serialise `.Blob` instances, and generate a JSONSchema for them. + We tell `pydantic` to base its handling of `Blob` on the + `.BlobModel` schema, but to use our custom validator and + serialiser, defined later as class methods. + + We will tweak the generated JSONSchema in `__get_pydantic_json_schema__` + to include the media_type and description defaults. + The representation of a `.Blob` in JSON is described by `.BlobModel` and includes the ``href`` and ``media_type`` properties as well as a description. @@ -475,6 +484,31 @@ def __get_pydantic_core_schema__( ), ) + @classmethod + def __get_pydantic_json_schema__( + cls, core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler + ) -> JsonSchemaValue: + """Customise the JSON Schema to include the media_type. + + :param core_schema: The core schema for the Blob type. + :param handler: The pydantic JSON schema handler. + :return: The JSON schema for the Blob type, with media_type included. + """ + json_schema = handler(core_schema) + json_schema = handler.resolve_ref_schema(json_schema) + # Set the title to the class name, not BlobModel + json_schema["title"] = cls.__name__ + # Add the media_type default value from this class + json_schema["properties"]["media_type"]["default"] = cls.media_type + # If the media_type is specific, add a const constraint + # This shows that only this media_type is valid + if "*" not in cls.media_type: + json_schema["properties"]["media_type"]["const"] = [cls.media_type] + # Add the default description + if cls.description is not None: + json_schema["properties"]["description"]["default"] = cls.description + return json_schema + @classmethod def _validate(cls, value: Any, handler: Callable[[Any], BlobModel]) -> Self: r"""Validate and convert a value to a `.Blob` instance. diff --git a/tests/test_blob_output.py b/tests/test_blob_output.py index 02ffb09..9be45ae 100644 --- a/tests/test_blob_output.py +++ b/tests/test_blob_output.py @@ -17,7 +17,11 @@ class TextBlob(lt.blob.Blob): - media_type: str = "text/plain" + media_type = "text/plain" + + +class VagueTextBlob(lt.blob.Blob): + media_type = "text/*" class ThingOne(lt.Thing): @@ -94,6 +98,23 @@ def test_blob_type(): assert M.from_bytes(b"").media_type == "text/plain" +def test_blob_schema(): + """Check that the Blob schema is as expected.""" + schema = TypeAdapter(TextBlob).json_schema() + assert schema["title"] == "TextBlob" + assert schema["type"] == "object" + assert "href" in schema["properties"] + assert "media_type" in schema["properties"] + assert schema["properties"]["media_type"]["default"] == "text/plain" + # Since media_type is specific, it should have a const constraint + assert schema["properties"]["media_type"]["const"] == ["text/plain"] + + # Check that a vague blob type has no const constraint + # This is because multiple media types are valid - it ends with * + schema = TypeAdapter(VagueTextBlob).json_schema() + assert "const" not in schema["properties"]["media_type"] + + def test_blob_creation(): """Check that blobs can be created in four ways""" TEXT = b"Test input" From bf6f48ec57b5c78310e5902723cbea496062e873 Mon Sep 17 00:00:00 2001 From: Richard Bowman Date: Mon, 12 Jan 2026 17:03:50 +0000 Subject: [PATCH 05/11] Actually check media types and eliminate ClientBlobData We can now use one Blob class for client and server :) I realised we had the potential to have inconsistencies between BlobData and the host Blob in the media type. We now check the types match, and allow the BlobData to override the Blob's default if it's a matching but more specific type. I've also take a pass through the blob documentation to update it where needed. Happily, as this PR only touches implementation details, not much has changed. --- docs/source/blobs.rst | 10 +- src/labthings_fastapi/client/__init__.py | 26 ++-- src/labthings_fastapi/client/outputs.py | 77 ------------ src/labthings_fastapi/outputs/blob.py | 145 +++++++++++++++++------ tests/test_blob_output.py | 73 +++++++++++- 5 files changed, 191 insertions(+), 140 deletions(-) delete mode 100644 src/labthings_fastapi/client/outputs.py diff --git a/docs/source/blobs.rst b/docs/source/blobs.rst index 4bcdb8f..a03da48 100644 --- a/docs/source/blobs.rst +++ b/docs/source/blobs.rst @@ -7,14 +7,14 @@ Blob input/output If interactions require only simple data types that can easily be represented in JSON, very little thought needs to be given to data types - strings and numbers will be converted to and from JSON automatically, and your Python code should only ever see native Python datatypes whether it's running on the server or a remote client. However, if you want to transfer larger data objects such as images, large arrays or other binary data, you will need to use a `.Blob` object. -`.Blob` objects are not part of the Web of Things specification, which doesn't give much consideration to returning large or complicated datatypes. In LabThings-FastAPI, the `.Blob` mechanism is intended to provide an efficient way to work with arbitrary binary data. If it's used to transfer data between two Things on the same server, the data should not be copied or otherwise iterated over - and when it must be transferred over the network it can be done using a binary transfer, rather than embedding in JSON with base64 encoding. +`.Blob` objects are not part of the Web of Things specification, which doesn't give much consideration to returning large or complicated datatypes. In LabThings-FastAPI, the `.Blob` mechanism is intended to provide an efficient way to work with arbitrary binary data. If a `.Blob` is passed between two Things on the same server, the data will not be copied - and when it must be transferred over the network it can be done using a binary transfer, rather than embedding in JSON with base64 encoding. -A `.Blob` consists of some data and a MIME type, which sets how the data should be interpreted. It is best to create a subclass of `.Blob` with the content type set: this makes it clear what kind of data is in the `.Blob`. In the future, it might be possible to add functionality to `.Blob` subclasses, for example to make it simple to obtain an image object from a `.Blob` containing JPEG data. However, this will not currently work across both client and server code. +A `.Blob` consists of some data and a MIME type, which sets how the data should be interpreted. It is best to create a subclass of `.Blob` with the ``media_type`` set: this makes it clear what kind of data is in the `.Blob`. In the future, it might be possible to add functionality to `.Blob` subclasses, for example to make it simple to obtain an image object from a `.Blob` containing JPEG data. However, this will not currently work across both client and server code. Creating and using `.Blob` objects ------------------------------------------------ -Blobs can be created from binary data that is in memory (a `bytes` object) with `.Blob.from_bytes`, on disk (with `.Blob.from_temporary_directory` or `.Blob.from_file`), or using a URL as a placeholder. The intention is that the code that uses a `.Blob` should not need to know which of these is the case, and should be able to use the same code regardless of how the data is stored. +Blobs can be created from binary data that is in memory (a `bytes` object) with `.Blob.from_bytes`, on disk (with `.Blob.from_temporary_directory` or `.Blob.from_file`). A `.Blob` may also point to remote data (see `.Blob.from_url`). Code that uses a `.Blob` should not need to know how the data is stored, as the interface is the same in each case. Blobs offer three ways to access their data: @@ -122,7 +122,7 @@ On the client, we can use the `capture_image` action directly (as before), or we HTTP interface and serialization -------------------------------- -`.Blob` objects are subclasses of `pydantic.BaseModel`, which means they can be serialized to JSON and deserialized from JSON. When this happens, the `.Blob` is represented as a JSON object with `.Blob.url` and `.Blob.content_type` fields. The `.Blob.url` field is a link to the data. The `.Blob.content_type` field is a string representing the MIME type of the data. It is worth noting that models may be nested: this means an action may return many `.Blob` objects in its output, either as a list or as fields in a `pydantic.BaseModel` subclass. Each `.Blob` in the output will be serialized to JSON with its URL and content type, and the client can then download the data from the URL, one download per `.Blob` object. +`.Blob` objects can be serialized to JSON and deserialized from JSON. When this happens, the `.Blob` is represented as a JSON object with ``href`` and ``content_type`` fields. The ``href`` field is a link to the data. The ``content_type`` field is a string representing the MIME type of the data. It is worth noting that models may be nested: this means an action may return many `.Blob` objects in its output, either as a list or as fields in a `pydantic.BaseModel` subclass. Each `.Blob` in the output will be serialized to JSON with its URL and content type, and the client can then download the data from the URL, one download per `.Blob` object. When a `.Blob` is serialized, the URL is generated with a unique ID to allow it to be downloaded. The URL is not guaranteed to be permanent, and should not be used as a long-term reference to the data. For `.Blob` objects that are part of the output of an action, the URL will expire after 5 minutes (or the retention time set for the action), and the data will no longer be available for download after that time. @@ -136,7 +136,7 @@ It may be possible to have actions return binary data directly in the future, bu .. note:: - Serialising or deserialising `.Blob` objects requires access to the `.BlobDataManager`\ . As there is no way to pass this in to the relevant methods at serialisation/deserialisation time, we use context variables to access them. This means that a `.blob_serialisation_context_manager` should be used to set (and then clear) those context variables. This is done by the `.BlobIOContextDep` dependency on the relevant endpoints (currently any endpoint that may return the output of an action). + Serialising or deserialising `.Blob` objects generates URLs, which are specific to the HTTP request. This means that `.Blob` objects cannot be serialised or deserialised outside the context of an HTTP request handler, so if code in an Action or Property attempts to turn a `.Blob` into JSON, it is likely to raise exceptions. For more detail on this mechanism, see `.middleware.url_for`\ . Memory management and retention diff --git a/src/labthings_fastapi/client/__init__.py b/src/labthings_fastapi/client/__init__.py index 1b45d4c..8fbc202 100644 --- a/src/labthings_fastapi/client/__init__.py +++ b/src/labthings_fastapi/client/__init__.py @@ -13,9 +13,9 @@ import httpx from urllib.parse import urlparse, urljoin -from pydantic import BaseModel +from pydantic import BaseModel, TypeAdapter -from .outputs import ClientBlobOutput +from ..outputs.blob import Blob, RemoteBlobData from ..exceptions import ( FailedToInvokeActionError, ServerActionError, @@ -206,16 +206,14 @@ def invoke_action(self, path: str, **kwargs: Any) -> Any: """ for k in kwargs.keys(): value = kwargs[k] - if isinstance(value, ClientBlobOutput): - # ClientBlobOutput objects may be used as input to a subsequent - # action. When this is done, they should be serialised to a dict - # with `href` and `media_type` keys, as done below. - # Ideally this should be replaced with `Blob` and the use of - # `pydantic` models to serialise action inputs. + if isinstance(value, Blob): + # Blob objects may be used as input to a subsequent + # action. When this is done, they should be serialised by + # pydantic, to a dictionary that includes href and media_type. # # Note that the blob will not be uploaded: we rely on the blob # still existing on the server. - kwargs[k] = {"href": value.href, "media_type": value.media_type} + kwargs[k] = TypeAdapter(Blob).dump_python(value) response = self.client.post(urljoin(self.path, path), json=kwargs) if response.is_error: message = _construct_failed_to_invoke_message(path, response) @@ -228,10 +226,12 @@ def invoke_action(self, path: str, **kwargs: Any) -> Any: and "href" in invocation["output"] and "media_type" in invocation["output"] ): - return ClientBlobOutput( - media_type=invocation["output"]["media_type"], - href=invocation["output"]["href"], - client=self.client, + return Blob( + RemoteBlobData( + media_type=invocation["output"]["media_type"], + href=invocation["output"]["href"], + client=self.client, + ) ) return invocation["output"] message = _construct_invocation_error_message(invocation) diff --git a/src/labthings_fastapi/client/outputs.py b/src/labthings_fastapi/client/outputs.py deleted file mode 100644 index 09c4962..0000000 --- a/src/labthings_fastapi/client/outputs.py +++ /dev/null @@ -1,77 +0,0 @@ -"""A client-side implementation of `.Blob`. - -.. note:: - - In the future, both client and server code are planned to use `.Blob` to - represent binary data, or data held in a file. - -When a `.ThingClient` returns data to a client that matches the schema of a `.Blob` -(specifically, it needs an `href` and a `media_type`), we convert it into a -`.ClientBlobOutput` object. This is a work-a-like for `.Blob`, meaning it can -be saved to a file or have its contents accessed in the same ways. -""" - -import io -from typing import Optional -import httpx - - -class ClientBlobOutput: - """An output from LabThings best returned as a file. - - This object is returned by a client when the output is not serialised to JSON. - It may be either retrieved to memory using `.ClientBlobOutput.content`, or - saved to a file using `.ClientBlobOutput.save`. - - .. note:: - - In the future, it is planned to replace this with `.Blob` as used on - server-side code. The ``.content`` and ``.save()`` methods should be - identical between the two. - """ - - media_type: str - download_url: str - - def __init__( - self, media_type: str, href: str, client: Optional[httpx.Client] = None - ) -> None: - """Create a ClientBlobOutput to wrap a link to a downloadable file. - - :param media_type: the MIME type of the remote file. - :param href: the URL where it may be downloaded. - :param client: if supplied, this `httpx.Client` will be used to - download the data. - """ - self.media_type = media_type - self.href = href - self.client = client or httpx.Client() - - @property - def content(self) -> bytes: - """The binary data, as a `bytes` object.""" - return self.client.get(self.href).content - - def save(self, filepath: str) -> None: - """Save the output to a file. - - This may remove the need to hold the output in memory, though for now it - simply retrieves the output into memory, then writes it to a file. - - :param filepath: the file will be saved at this location. - """ - with open(filepath, "wb") as f: - f.write(self.content) - - def open(self) -> io.IOBase: - """Open the output as a binary file-like object. - - Internally, this will download the file to memory, and wrap the - resulting `bytes` object in an `io.BytesIO` object to allow it to - function as a file-like object. - - To work with the data on disk, use `.ClientBlobOutput.save` instead. - - :return: a file-like object containing the downloaded data. - """ - return io.BytesIO(self.content) diff --git a/src/labthings_fastapi/outputs/blob.py b/src/labthings_fastapi/outputs/blob.py index 0897a01..861f3d9 100644 --- a/src/labthings_fastapi/outputs/blob.py +++ b/src/labthings_fastapi/outputs/blob.py @@ -102,6 +102,9 @@ def get_href(self) -> str: The implementation of this method for local blobs will need `.url_for.url_for` and thus it should only be called in a response handler when the `.middeware.url_for` middleware is enabled. + + :return: the URL as a string. + :raises NotImplementedError: always, as this must be implemented by subclasses. """ raise NotImplementedError("get_href must be implemented.") @@ -117,6 +120,7 @@ def save(self, filename: str) -> None: """Save the data to a file. :param filename: the path where the file should be saved. + :raises NotImplementedError: always, as this must be implemented by subclasses. """ raise NotImplementedError("save must be implemented.") @@ -124,6 +128,7 @@ def open(self) -> io.IOBase: """Return a file-like object that may be read from. :return: an open file-like object. + :raises NotImplementedError: always, as this must be implemented by subclasses. """ raise NotImplementedError("open must be implemented.") @@ -158,7 +163,10 @@ def __init__( self._client = client or httpx.Client() def get_href(self) -> str: - """Return the URL to download the data.""" + """Return the URL to download the data. + + :return: the URL as a string. + """ return self._href @property @@ -225,6 +233,8 @@ def get_href(self) -> str: Note that this should only be called in a response handler, as it relies on `.url_for.url_for`\ . + + :return: the URL as a string. """ return str(url_for("download_blob", blob_id=self.id)) @@ -232,6 +242,7 @@ def response(self) -> Response: """Return a`fastapi.Response` object that sends binary data. :return: a response that streams the data from disk or memory. + :raises NotImplementedError: always, as this must be implemented by subclasses. """ raise NotImplementedError @@ -397,6 +408,54 @@ class BlobModel(BaseModel): """This description is added to the serialised `.Blob`.""" +def parse_media_type(media_type: str) -> tuple[str | None, str | None]: + """Parse a media type string into its type and subtype. + + :param media_type: the media type string to parse. + + :return: a tuple of (type, subtype) where each is a string or None. + :raises ValueError: if the media type is invalid. + """ + if not media_type: + return None, None + # Ignore leading whitespace and parameters (after a ;) + media_type = media_type.strip().split(";")[0] + # We expect a type and subtype separated with a / + parts = media_type.split("/") + if len(parts) != 2: + raise ValueError( + f"Invalid media type: {media_type} must contain exactly one '/'." + ) + for i in range(2): + part = parts[i].strip() + if part == "*": + parts[i] = None + if not parts[0] and parts[1]: + raise ValueError( + f"Invalid media type: {media_type} has no type but has a subtype." + ) + return parts[0], parts[1] + + +def match_media_types(media_type: str, pattern: str) -> bool: + """Check if a media type matches a pattern. + + The pattern may include wildcards, e.g. ``image/*`` or ``*/*``. + + :param media_type: the media type to check. + :param pattern: the pattern to match against. + + :return: True if the media type matches the pattern, False otherwise. + """ + type_a, subtype_a = parse_media_type(media_type) + type_b, subtype_b = parse_media_type(pattern) + if type_b is not None and type_a != type_b: + return False + if subtype_b is not None and subtype_a != subtype_b: + return False + return True + + class Blob: r"""A container for binary data that may be retrieved over HTTP. @@ -404,18 +463,21 @@ class Blob: A `.Blob` may be created to hold data using the class methods `.Blob.from_bytes`, `.Blob.from_file` or `.Blob.from_temporary_directory`\ . - It may also reference remote data, using `.Blob.from_url`\ . + It may also reference remote data, using `.Blob.from_url`\ , though this + is currently only used on the client side. The constructor requires a `.BlobData` instance, so the methods mentioned - previously are likely more convenient. + previously are likely a more convenient way to instantiate a `.Blob`\ . You are strongly advised to use a subclass of this class that specifies the `.Blob.media_type` attribute, as this will propagate to the auto-generated - documentation. + documentation and make the return type of your action clearer. This class is `pydantic` compatible, in that it provides a schema, validator and serialiser. However, it may use `.url_for.url_for` during serialisation, so it should only be serialised in a request handler function. This functionality is intended for use by LabThings library functions only. + Validation and serialisation behaviour is described in the docstrings of + `.Blob._validate` and `.Blob._serialize`. """ media_type: str = "*/*" @@ -430,11 +492,22 @@ def __init__(self, data: BlobData, description: str | None = None) -> None: :param data: the `.BlobData` object that stores the data. :param description: an optional description of the blob. + + :raise ValueError: if the media_type of the data does not match + the media_type of the `.Blob` subclass. """ super().__init__() self._data = data if description is not None: self.description = description + if not match_media_types(data.media_type, self.media_type): + raise ValueError( + f"Blob data media_type '{data.media_type}' does not match " + f"Blob media_type '{self.media_type}'." + ) + # The data may have a more specific media_type, so we use that + # in preference to the default defined by the class. + self.media_type = data.media_type @classmethod def __get_pydantic_core_schema__( @@ -446,28 +519,13 @@ def __get_pydantic_core_schema__( instances, and generate a JSONSchema for them. We tell `pydantic` to base its handling of `Blob` on the - `.BlobModel` schema, but to use our custom validator and - serialiser, defined later as class methods. + `.BlobModel` schema, with custom validation and serialisation. + Validation and serialisation behaviour is described in the docstrings + of `.Blob._validate` and `.Blob._serialize`. - We will tweak the generated JSONSchema in `__get_pydantic_json_schema__` - to include the media_type and description defaults. - - The representation of a `.Blob` in JSON is described by - `.BlobModel` and includes the ``href`` and ``media_type`` properties - as well as a description. - - When a `.Blob` is serialised, we will generate a download URL that - matches the request to which we are responding. This means we may - only serialise a `.Blob` in the context of a request handler, and - it's required that the `.middleware.url_for` middleware is in use. - - When a `.Blob` is validated, we will check to see if the URL given - as its ``href`` looks like a `.Blob` download URL on this server. If - it does, the returned object will hold a reference to the local data. - If we can't match the URL to a `.Blob` on this server, we will raise - an error. Handling of `.Blob` input is currently experimental, and - limited to passing the output of one Action as input to a subsequent - one. + The JSONSchema is generated for `.BlobModel` but is then refined + in `__get_pydantic_json_schema__` to include the ``media_type`` + and ``description`` defaults. :param source: The source type being converted. :param handler: The pydantic core schema handler. @@ -513,20 +571,25 @@ def __get_pydantic_json_schema__( def _validate(cls, value: Any, handler: Callable[[Any], BlobModel]) -> Self: r"""Validate and convert a value to a `.Blob` instance. - :param value: The value to validate. - :param handler: The handler to convert the value if needed. + :param value: The input value, as passed in or loaded from JSON. + :param handler: A function that runs the validation logic of BlobModel. + + If the value is already a `.Blob`, it will be returned directly. + Otherwise, we first validate the input using the `.BlobModel` schema. + + When a `.Blob` is validated, we check to see if the URL given + as its ``href`` looks like a `.Blob` download URL on this server. If + it does, the returned object will hold a reference to the local data. - When a `.Blob` is created from a dictionary, LabThings - will attempt to deserialise it by retrieving the data from the URL - specified in `.Blob.href`. Currently, this must be a URL pointing to a - `.Blob` that already exists on this server, and any other URL will - cause a `LookupError`. + If we can't match the URL to a `.Blob` on this server, we will raise + an error. Handling of `.Blob` input is currently experimental, and + limited to passing the output of one Action as input to a subsequent + one. - :return: the `.Blob` object (i.e. ``self``), after retrieving the data. + :return: a `.Blob` object pointing to the data. - :raise ValueError: if the ``href`` is set as ``"blob://local"`` but - the ``_data`` attribute has not been set. This happens when the - `.Blob` is being constructed using `.Blob.from_bytes` or similar. + :raise ValueError: if the ``href`` does not contain a valid Blob ID, or + if the Blob ID is not found on this server. """ # If the value is already a Blob, return it directly if isinstance(value, cls): @@ -549,7 +612,12 @@ def _serialize( ) -> Mapping[str, str]: """Serialise the Blob to a dictionary. + See `.Blob.to_blobmodel` for a description of how we serialise. + :param obj: the `.Blob` instance to serialise. + :param handler: the handler (provided by pydantic) takes a BlobModel + and converts it to a dictionary. The handler runs the serialiser of + the core schema we've wrapped, in this case the BlobModel serialiser. :return: a JSON-serialisable dictionary with a URL that allows the `.Blob` to be downloaded from the `.BlobManager`. """ @@ -566,9 +634,6 @@ def to_blobmodel(self) -> BlobModel: :return: a JSON-serialisable dictionary with a URL that allows the `.Blob` to be downloaded from the `.BlobManager`. - :raises TypeError: if the blob data ID is missing. This should - never happen, and if it does it's probably a bug in the - `.BlobData` class. """ data = { "href": self.data.get_href(), @@ -722,6 +787,8 @@ def response(self) -> Response: that returns the data over HTTP. :return: an HTTP response that streams data from memory or file. + :raise NotImplementedError: if the data is not local. It's not currently + possible to serve remote data via the `.BlobManager`. """ data = self.data if isinstance(data, LocalBlobData): diff --git a/tests/test_blob_output.py b/tests/test_blob_output.py index 9be45ae..0394d86 100644 --- a/tests/test_blob_output.py +++ b/tests/test_blob_output.py @@ -11,7 +11,6 @@ from pydantic_core import PydanticSerializationError import pytest import labthings_fastapi as lt -from labthings_fastapi.client.outputs import ClientBlobOutput from labthings_fastapi.exceptions import FailedToInvokeActionError from labthings_fastapi.testing import create_thing_without_server, use_dummy_url_for @@ -98,6 +97,45 @@ def test_blob_type(): assert M.from_bytes(b"").media_type == "text/plain" +@pytest.mark.parametrize( + ("media_type", "expected"), + [ + ("text/plain", ("text", "plain")), + ("text/plain; charset=utf-8", ("text", "plain")), + ("text/*", ("text", None)), + ("*/*", (None, None)), + ], +) +def test_media_type_parsing(media_type, expected): + """Check that media type parsing works as expected.""" + assert lt.blob.parse_media_type(media_type) == expected + + +@pytest.mark.parametrize("media_type", ["too/many/slashes", "noslash", "*/plain"]) +def test_invalid_media_type_parsing(media_type): + """Check that invalid media types raise an error.""" + with pytest.raises(ValueError): + lt.blob.parse_media_type(media_type) + + +@pytest.mark.parametrize( + ("data_media_type", "blob_media_type", "expected"), + [ + ("text/plain", "text/plain", True), + ("text/html", "text/*", True), + ("image/png", "image/*", True), + ("application/json", "*/*", True), + ("text/plain", "text/html", False), + ("image/jpeg", "image/png", False), + ("application/xml", "application/json", False), + ("text/plain", "image/*", False), + ], +) +def test_media_type_matching(data_media_type, blob_media_type, expected): + """Check that media type matching works as expected.""" + assert lt.blob.match_media_types(data_media_type, blob_media_type) is expected + + def test_blob_schema(): """Check that the Blob schema is as expected.""" schema = TypeAdapter(TextBlob).json_schema() @@ -115,6 +153,26 @@ def test_blob_schema(): assert "const" not in schema["properties"]["media_type"] +def test_blob_initialisation(): + """Check that blobs can be initialised correctly.""" + data = lt.blob.BlobBytes(b"Test data", media_type="text/plain") + blob = TextBlob(data, description="A test blob") + assert blob.content == b"Test data" + assert blob.media_type == "text/plain" + assert blob.description == "A test blob" + + # Check that the media type is refined if the data is more + # specific than the Blob class + vague_blob = VagueTextBlob(data) + assert vague_blob.content == b"Test data" + assert vague_blob.media_type == "text/plain" + + # Check we get an error if the media type doesn't match + data_bad = lt.blob.BlobBytes(b"Bad data", media_type="image/png") + with pytest.raises(ValueError): + _ = TextBlob(data_bad) + + def test_blob_creation(): """Check that blobs can be created in four ways""" TEXT = b"Test input" @@ -224,16 +282,19 @@ def test_blob_input(client): # Check that a bad URL results in an error. # This URL is not totally bad - it follows the right form, but the # UUID is not found on the server. - bad_blob = ClientBlobOutput( - media_type="text/plain", href=f"http://nonexistent.local/blob/{uuid4()}" + bad_blob = lt.blob.Blob( + lt.blob.RemoteBlobData( + media_type="text/plain", href=f"http://nonexistent.local/blob/{uuid4()}" + ) ) with pytest.raises(FailedToInvokeActionError, match="wasn't found"): tc.passthrough_blob(blob=bad_blob) # Try again with a totally bogus URL - bad_blob = ClientBlobOutput( - media_type="text/plain", href="http://nonexistent.local/totally_bogus" + bad_blob = lt.blob.Blob( + lt.blob.RemoteBlobData( + media_type="text/plain", href="http://nonexistent.local/totally_bogus" + ) ) - msg = "must contain a Blob ID" with pytest.raises(FailedToInvokeActionError, match=msg): tc.passthrough_blob(blob=bad_blob) From e9d39a25a8a0e289fda8253f9ab0be0998ad034c Mon Sep 17 00:00:00 2001 From: Richard Bowman Date: Mon, 12 Jan 2026 17:03:50 +0000 Subject: [PATCH 06/11] Use URLFor instead of guessing URL for action invocations. --- src/labthings_fastapi/actions.py | 23 +++++++++---------- src/labthings_fastapi/invocations.py | 4 +++- src/labthings_fastapi/middleware/url_for.py | 2 +- .../thing_description/_model.py | 8 ++++--- tests/test_blob_output.py | 1 - 5 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/labthings_fastapi/actions.py b/src/labthings_fastapi/actions.py index df3b449..ea31462 100644 --- a/src/labthings_fastapi/actions.py +++ b/src/labthings_fastapi/actions.py @@ -39,6 +39,8 @@ from fastapi import FastAPI, HTTPException, Request, Body, BackgroundTasks from pydantic import BaseModel, create_model +from labthings_fastapi.middleware.url_for import URLFor + from .base_descriptor import BaseDescriptor from .logs import add_thing_log_destination from .utilities import model_to_dict, wrap_plain_types_in_rootmodel @@ -207,7 +209,7 @@ def cancel(self) -> None: """ self.cancel_hook.set() - def response(self, request: Optional[Request] = None) -> InvocationModel: + def response(self) -> InvocationModel: """Generate a representation of the invocation suitable for HTTP. When an invocation is polled, we return a JSON object that includes @@ -219,13 +221,11 @@ def response(self, request: Optional[Request] = None) -> InvocationModel: :return: an `.InvocationModel` representing this `.Invocation`. """ - if request: - href = str(request.url_for("action_invocation", id=self.id)) - else: - href = f"{ACTION_INVOCATIONS_PATH}/{self.id}" links = [ - LinkElement(rel="self", href=href), - LinkElement(rel="output", href=href + "/output"), + LinkElement(rel="self", href=URLFor("action_invocation", id=self.id)), + LinkElement( + rel="output", href=URLFor("action_invocation_output", id=self.id) + ), ] # The line below confuses MyPy because self.action **evaluates to** a Descriptor # object (i.e. we don't call __get__ on the descriptor). @@ -233,7 +233,7 @@ def response(self, request: Optional[Request] = None) -> InvocationModel: status=self.status, id=self.id, action=self.thing.path + self.action.name, # type: ignore[call-overload] - href=href, + href=URLFor("action_invocation", id=self.id), timeStarted=self._start_time, timeCompleted=self._end_time, timeRequested=self._request_time, @@ -424,7 +424,7 @@ def list_invocations( :return: A list of invocations, optionally filtered by Thing and/or Action. """ return [ - i.response(request=request) + i.response() for i in self.invocations if thing is None or i.thing == thing if action is None or i.action == action # type: ignore[call-overload] @@ -473,7 +473,7 @@ def action_invocation(id: uuid.UUID, request: Request) -> InvocationModel: """ try: with self._invocations_lock: - return self._invocations[id].response(request=request) + return self._invocations[id].response() except KeyError as e: raise HTTPException( status_code=404, @@ -778,7 +778,6 @@ def add_to_fastapi(self, app: FastAPI, thing: Thing) -> None: # The solution below is to manually add the annotation, before passing # the function to the decorator. def start_action( - request: Request, body: Any, # This annotation will be overwritten below. id: NonWarningInvocationID, background_tasks: BackgroundTasks, @@ -793,7 +792,7 @@ def start_action( id=id, ) background_tasks.add_task(action_manager.expire_invocations) - return action.response(request=request) + return action.response() if issubclass(self.input_model, EmptyInput): annotation = Body(default_factory=StrictEmptyInput) diff --git a/src/labthings_fastapi/invocations.py b/src/labthings_fastapi/invocations.py index 7ed10e9..21f156e 100644 --- a/src/labthings_fastapi/invocations.py +++ b/src/labthings_fastapi/invocations.py @@ -13,6 +13,8 @@ from pydantic import BaseModel, ConfigDict, model_validator +from labthings_fastapi.middleware.url_for import URLFor + from .thing_description._model import Links @@ -91,7 +93,7 @@ class GenericInvocationModel(BaseModel, Generic[InputT, OutputT]): status: InvocationStatus id: uuid.UUID action: str - href: str + href: URLFor timeStarted: Optional[datetime] timeRequested: Optional[datetime] timeCompleted: Optional[datetime] diff --git a/src/labthings_fastapi/middleware/url_for.py b/src/labthings_fastapi/middleware/url_for.py index 09ab2c7..e519a11 100644 --- a/src/labthings_fastapi/middleware/url_for.py +++ b/src/labthings_fastapi/middleware/url_for.py @@ -204,4 +204,4 @@ def _validate(cls, value: Any, handler: Callable[[Any], Self]) -> Self: if isinstance(value, cls): return value else: - raise TypeError("URLFor instances may not be created from strings.") + raise ValueError("URLFor instances may not be created from strings.") diff --git a/src/labthings_fastapi/thing_description/_model.py b/src/labthings_fastapi/thing_description/_model.py index 83fae77..b4ee214 100644 --- a/src/labthings_fastapi/thing_description/_model.py +++ b/src/labthings_fastapi/thing_description/_model.py @@ -39,6 +39,8 @@ ) from pydantic import AnyUrl, BaseModel, Field, ConfigDict, AfterValidator +from labthings_fastapi.middleware.url_for import URLFor + class Version(BaseModel): """Version info for a Thing. @@ -240,7 +242,7 @@ class Form(BaseModel, Generic[OpT]): model_config = ConfigDict(extra="allow") - href: AnyUri + href: URLFor | AnyUri op: Optional[Union[OpT, List[OpT]]] = None contentType: Optional[str] = None contentCoding: Optional[str] = None @@ -296,7 +298,7 @@ class LinkElement(BaseModel): model_config = ConfigDict(extra="allow") - href: AnyUri + href: URLFor | AnyUri type: Optional[str] = None rel: Optional[str] = None anchor: Optional[AnyUri] = None @@ -439,7 +441,7 @@ class WotTdSchema16October2019(BaseModel): version: Optional[Version] = None links: Links = None forms: Optional[List[Form[RootOp]]] = Field(None, min_length=1) - base: Optional[AnyUri] = None + base: Optional[URLFor | AnyUri] = None securityDefinitions: Dict[str, SecurityScheme] support: Optional[AnyUri] = None created: Optional[datetime] = None diff --git a/tests/test_blob_output.py b/tests/test_blob_output.py index 0394d86..2ecfc0f 100644 --- a/tests/test_blob_output.py +++ b/tests/test_blob_output.py @@ -245,7 +245,6 @@ def test_blob_output_inserver(client): def check_blob(output, expected_content: bytes): """Test that a BlobOutput can be retrieved in three ways""" - print(f"Testing blob output {output} which has attributes {output.__dict__}") assert output.content == expected_content with TemporaryDirectory() as dir: output.save(os.path.join(dir, "test_output")) From bd04379bec04774e3e19a1e3b6c3309c6c25deaf Mon Sep 17 00:00:00 2001 From: Richard Bowman Date: Mon, 12 Jan 2026 17:03:50 +0000 Subject: [PATCH 07/11] Slightly neater media type comparison and fixed tests. I got rid of the conversion of "*" to None, I think it's clearer this way. I also fixed a typo and ignored a codespell false positive. --- src/labthings_fastapi/outputs/blob.py | 22 ++++++++++++---------- tests/test_blob_output.py | 4 ++-- tests/test_middleware_url_for.py | 9 +++++---- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/labthings_fastapi/outputs/blob.py b/src/labthings_fastapi/outputs/blob.py index 861f3d9..0266aa6 100644 --- a/src/labthings_fastapi/outputs/blob.py +++ b/src/labthings_fastapi/outputs/blob.py @@ -426,15 +426,17 @@ def parse_media_type(media_type: str) -> tuple[str | None, str | None]: raise ValueError( f"Invalid media type: {media_type} must contain exactly one '/'." ) - for i in range(2): - part = parts[i].strip() - if part == "*": - parts[i] = None - if not parts[0] and parts[1]: + main_type = parts[0].strip() + sub_type = parts[1].strip() + if len(main_type) == 0 or len(sub_type) == 0: + raise ValueError( + f"Invalid media type: {media_type} must have both type and subtype." + ) + if main_type == "*" and sub_type != "*": raise ValueError( f"Invalid media type: {media_type} has no type but has a subtype." ) - return parts[0], parts[1] + return main_type, sub_type def match_media_types(media_type: str, pattern: str) -> bool: @@ -449,9 +451,9 @@ def match_media_types(media_type: str, pattern: str) -> bool: """ type_a, subtype_a = parse_media_type(media_type) type_b, subtype_b = parse_media_type(pattern) - if type_b is not None and type_a != type_b: + if type_b != "*" and type_a != type_b: return False - if subtype_b is not None and subtype_a != subtype_b: + if subtype_b != "*" and subtype_a != subtype_b: return False return True @@ -539,7 +541,7 @@ def __get_pydantic_core_schema__( is_field_serializer=False, info_arg=False, when_used="always", - ), + ), # codespell:ignore ser ) @classmethod @@ -627,7 +629,7 @@ def to_blobmodel(self) -> BlobModel: r"""Represent the `.Blob` as a `.BlobModel` to get ready to serialise. When `pydantic` serialises this object, we first generate a `.BlobModel` - witht just the information to be serialised. + with just the information to be serialised. We use `.from_url.from_url` to generate the URL, so this will error if it is serialised anywhere other than a request handler with the middleware from `.middleware.url_for` enabled. diff --git a/tests/test_blob_output.py b/tests/test_blob_output.py index 2ecfc0f..834ea01 100644 --- a/tests/test_blob_output.py +++ b/tests/test_blob_output.py @@ -102,8 +102,8 @@ def test_blob_type(): [ ("text/plain", ("text", "plain")), ("text/plain; charset=utf-8", ("text", "plain")), - ("text/*", ("text", None)), - ("*/*", (None, None)), + ("text/*", ("text", "*")), + ("*/*", ("*", "*")), ], ) def test_media_type_parsing(media_type, expected): diff --git a/tests/test_middleware_url_for.py b/tests/test_middleware_url_for.py index e8f0dc8..09ef9d2 100644 --- a/tests/test_middleware_url_for.py +++ b/tests/test_middleware_url_for.py @@ -2,7 +2,7 @@ import threading import pytest -from pydantic import BaseModel +from pydantic import BaseModel, ValidationError from pydantic_core import PydanticSerializationError from fastapi import FastAPI from starlette.testclient import TestClient @@ -64,11 +64,12 @@ def test_validation(): assert m.url is u # Trying to initialise with anything else should raise an error - with pytest.raises(TypeError): + msg = "URLFor instances may not be created from strings" + with pytest.raises(ValidationError, match=msg): _ = ModelWithURL(url="https://example.com") - with pytest.raises(TypeError): + with pytest.raises(ValidationError): _ = ModelWithURL(url="endpoint_name") - with pytest.raises(TypeError): + with pytest.raises(ValidationError): _ = ModelWithURL(url=None) From 99e76ca30331626cc63e1b19b54c3397596340c7 Mon Sep 17 00:00:00 2001 From: Richard Bowman Date: Mon, 12 Jan 2026 17:12:24 +0000 Subject: [PATCH 08/11] Ignore `ser` globally in codespell The `codespell:ignore` directives made lines too long, and I'm happy that "ser" is an abbreviation we are stuck with. --- dev-requirements.txt | 1 + pyproject.toml | 4 ++++ src/labthings_fastapi/middleware/url_for.py | 4 +--- src/labthings_fastapi/outputs/blob.py | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 7aa2665..95175f8 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -321,6 +321,7 @@ tabulate==0.9.0 # via sphinx-toolbox tomli==2.2.1 # via + # labthings-fastapi (pyproject.toml) # coverage # flake8-pyproject # mypy diff --git a/pyproject.toml b/pyproject.toml index 72e72ad..4179767 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ dev = [ "sphinx>=7.2", "sphinx-autoapi", "sphinx-toolbox", + "tomli; python_version < '3.11'", "codespell", ] @@ -171,5 +172,8 @@ check-return-types = false check-class-attributes = false # prefer docstrings on the attributes check-yield-types = false # use type annotations instead +[tool.codespell] +ignore-words-list = ["ser"] + [project.scripts] labthings-server = "labthings_fastapi.server.cli:serve_from_cli" diff --git a/src/labthings_fastapi/middleware/url_for.py b/src/labthings_fastapi/middleware/url_for.py index e519a11..c85bbce 100644 --- a/src/labthings_fastapi/middleware/url_for.py +++ b/src/labthings_fastapi/middleware/url_for.py @@ -187,9 +187,7 @@ def __get_pydantic_core_schema__( return core_schema.no_info_wrap_validator_function( cls._validate, AnyUrl.__get_pydantic_core_schema__(AnyUrl, handler), - serialization=core_schema.to_string_ser_schema( # codespell:ignore ser - when_used="always" - ), + serialization=core_schema.to_string_ser_schema(when_used="always"), ) @classmethod diff --git a/src/labthings_fastapi/outputs/blob.py b/src/labthings_fastapi/outputs/blob.py index 0266aa6..3ad48c2 100644 --- a/src/labthings_fastapi/outputs/blob.py +++ b/src/labthings_fastapi/outputs/blob.py @@ -541,7 +541,7 @@ def __get_pydantic_core_schema__( is_field_serializer=False, info_arg=False, when_used="always", - ), # codespell:ignore ser + ), ) @classmethod From 9d4751749186eb59b4aa9bfcfc55516d2ae80593 Mon Sep 17 00:00:00 2001 From: Richard Bowman Date: Wed, 14 Jan 2026 09:50:07 +0000 Subject: [PATCH 09/11] Docstring fixes --- src/labthings_fastapi/actions.py | 3 --- src/labthings_fastapi/middleware/url_for.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/labthings_fastapi/actions.py b/src/labthings_fastapi/actions.py index ea31462..ff8091d 100644 --- a/src/labthings_fastapi/actions.py +++ b/src/labthings_fastapi/actions.py @@ -216,9 +216,6 @@ def response(self) -> InvocationModel: its status, any log entries, a return value (if completed), and a link to poll for updates. - :param request: is used to generate the ``href`` in the response, which - should retrieve an updated version of this response. - :return: an `.InvocationModel` representing this `.Invocation`. """ links = [ diff --git a/src/labthings_fastapi/middleware/url_for.py b/src/labthings_fastapi/middleware/url_for.py index c85bbce..f34f453 100644 --- a/src/labthings_fastapi/middleware/url_for.py +++ b/src/labthings_fastapi/middleware/url_for.py @@ -197,7 +197,7 @@ def _validate(cls, value: Any, handler: Callable[[Any], Self]) -> Self: :param value: The value to validate. :param handler: The handler to convert the value if needed. :return: The validated URLFor instance. - :raises TypeError: if the value is not a URLFor instance. + :raises ValueError: if the value is not a URLFor instance. """ if isinstance(value, cls): return value From 503a124964b0d00409434d0132409d4ef94f146d Mon Sep 17 00:00:00 2001 From: Richard Bowman Date: Wed, 14 Jan 2026 15:41:36 +0000 Subject: [PATCH 10/11] Fix parse_media_type and improve testing This gets full coverage of `blob.py` and checks a few things that weren't tested directly. I actually don't quite understand why coverage thought we weren't downloading the data as I'm fairly sure that was done already - but it's no bad thing to have an explicit test that doesn't go via the ThingServer. --- src/labthings_fastapi/outputs/blob.py | 4 +- tests/test_blob_output.py | 97 +++++++++++++++++++++++++-- 2 files changed, 94 insertions(+), 7 deletions(-) diff --git a/src/labthings_fastapi/outputs/blob.py b/src/labthings_fastapi/outputs/blob.py index 3ad48c2..c53bf8f 100644 --- a/src/labthings_fastapi/outputs/blob.py +++ b/src/labthings_fastapi/outputs/blob.py @@ -408,7 +408,7 @@ class BlobModel(BaseModel): """This description is added to the serialised `.Blob`.""" -def parse_media_type(media_type: str) -> tuple[str | None, str | None]: +def parse_media_type(media_type: str) -> tuple[str, str]: """Parse a media type string into its type and subtype. :param media_type: the media type string to parse. @@ -416,8 +416,6 @@ def parse_media_type(media_type: str) -> tuple[str | None, str | None]: :return: a tuple of (type, subtype) where each is a string or None. :raises ValueError: if the media type is invalid. """ - if not media_type: - return None, None # Ignore leading whitespace and parameters (after a ;) media_type = media_type.strip().split(";")[0] # We expect a type and subtype separated with a / diff --git a/tests/test_blob_output.py b/tests/test_blob_output.py index 834ea01..0633776 100644 --- a/tests/test_blob_output.py +++ b/tests/test_blob_output.py @@ -6,6 +6,7 @@ from tempfile import TemporaryDirectory from uuid import uuid4 +import fastapi from fastapi.testclient import TestClient from pydantic import TypeAdapter from pydantic_core import PydanticSerializationError @@ -21,6 +22,7 @@ class TextBlob(lt.blob.Blob): class VagueTextBlob(lt.blob.Blob): media_type = "text/*" + description = "This URL will download some vague text data." class ThingOne(lt.Thing): @@ -111,10 +113,17 @@ def test_media_type_parsing(media_type, expected): assert lt.blob.parse_media_type(media_type) == expected -@pytest.mark.parametrize("media_type", ["too/many/slashes", "noslash", "*/plain"]) -def test_invalid_media_type_parsing(media_type): +@pytest.mark.parametrize( + ("media_type", "msg"), + [ + ("too/many/slashes", "exactly one '/'"), + ("/leadingslash", "both type and subtype"), + ("*/plain", "has no type"), + ], +) +def test_invalid_media_type_parsing(media_type, msg): """Check that invalid media types raise an error.""" - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): lt.blob.parse_media_type(media_type) @@ -136,6 +145,29 @@ def test_media_type_matching(data_media_type, blob_media_type, expected): assert lt.blob.match_media_types(data_media_type, blob_media_type) is expected +def test_blobdata_base_class(): + """Check that BlobData/LocalBlobData abstract methods raise the right error.""" + bd = lt.blob.BlobData("*/*") + with pytest.raises(NotImplementedError): + _ = bd.content + with pytest.raises(NotImplementedError): + _ = bd.open() + with pytest.raises(NotImplementedError): + bd.save("somefile") + with pytest.raises(NotImplementedError): + _ = bd.get_href() + + lbd = lt.blob.LocalBlobData(media_type="text/plain") + with pytest.raises(NotImplementedError): + _ = lbd.content + with pytest.raises(NotImplementedError): + _ = lbd.open() + with pytest.raises(NotImplementedError): + lbd.save("somefile") + with pytest.raises(NotImplementedError): + _ = lbd.response() + + def test_blob_schema(): """Check that the Blob schema is as expected.""" schema = TypeAdapter(TextBlob).json_schema() @@ -151,6 +183,9 @@ def test_blob_schema(): # This is because multiple media types are valid - it ends with * schema = TypeAdapter(VagueTextBlob).json_schema() assert "const" not in schema["properties"]["media_type"] + assert schema["properties"]["description"]["default"] == ( + "This URL will download some vague text data." + ) def test_blob_initialisation(): @@ -203,10 +238,26 @@ def test_blob_creation(): assert blob.to_blobmodel().href == "https://example.com/blob" +def test_blob_data_manager(): + """Check blobs appear in the data manager.""" + blob = TextBlob.from_bytes(b"Some Data") + data = blob.data + assert isinstance(data, lt.blob.LocalBlobData) + id = data.id + assert lt.blob.blob_data_manager.get_blob(id) is data + with pytest.raises(ValueError): + lt.blob.blob_data_manager.add_blob(data) + del data + del blob + # Check that the blob doesn't linger due to references + with pytest.raises(KeyError): + lt.blob.blob_data_manager.get_blob(id) + + def test_blob_serialisation(): """Check that blobs may be serialised.""" blob = TextBlob.from_bytes(b"Some data") - # Can't serialise a blob (with data) without a BlobDataManager + # Can't serialise a blob (with data) without url_for in the context with pytest.raises(PydanticSerializationError): TypeAdapter(TextBlob).dump_python(blob) # Fake the required context variable, and it should work @@ -215,6 +266,14 @@ def test_blob_serialisation(): assert data["href"].startswith("urlfor://download_blob/?blob_id=") assert data["media_type"] == "text/plain" + vagueblob = VagueTextBlob.from_bytes(b"Some data") + # VagueTextBlob has a customised description that should be included + with use_dummy_url_for(): + data = TypeAdapter(VagueTextBlob).dump_python(vagueblob) + assert data["href"].startswith("urlfor://download_blob/?blob_id=") + assert data["media_type"] == "text/*" + assert data["description"] == "This URL will download some vague text data." + # Blobs that already refer to a remote URL should serialise without error. remoteblob = TextBlob.from_url( href="https://example/", @@ -224,6 +283,36 @@ def test_blob_serialisation(): assert data["media_type"] == "text/plain" +def test_blob_download(): + """Check that blob downloading works as expected.""" + # We use a bare FastAPI app to do an isolated test + app = fastapi.FastAPI() + # # This is needed to generate download URLs + # app.middleware("http")(url_for.url_for_middleware) + + # @app.get("/blob_json/") + # def blob_json() -> TextBlob: + # return TextBlob.from_bytes(b"Blob JSON!") + + @app.get("/download_blob/") + def download_blob(): + blob = TextBlob.from_bytes(b"Download me!") + return blob.response() + + with TestClient(app) as client: + response = client.get("/download_blob/") + assert response.status_code == 200 + assert response.content == b"Download me!" + assert response.headers["content-type"].startswith("text/plain") + + # Remote blobs can't be downloaded yet. There's no need to do this + # within a server, as the error is the same either way (and it's + # much easier to catch here). + remote = TextBlob.from_url(href="https://example.com/remote_blob") + with pytest.raises(NotImplementedError): + _ = remote.response() + + def test_blob_output_client(client): """Test that blob outputs work as expected when used over HTTP.""" tc = lt.ThingClient.from_url("/thing_one/", client=client) From d7eb18c2f57a9688e2ab7472a3d589a0db0e5514 Mon Sep 17 00:00:00 2001 From: Richard Bowman Date: Wed, 14 Jan 2026 15:55:25 +0000 Subject: [PATCH 11/11] Improve module level docstring This adds a sequence of events for blob serialisation, --- src/labthings_fastapi/outputs/blob.py | 45 ++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/src/labthings_fastapi/outputs/blob.py b/src/labthings_fastapi/outputs/blob.py index c53bf8f..4f117db 100644 --- a/src/labthings_fastapi/outputs/blob.py +++ b/src/labthings_fastapi/outputs/blob.py @@ -1,4 +1,4 @@ -"""BLOB Output Module. +r"""BLOB Output Module. The ``.Blob`` class is used when you need to return something file-like that can't easily (or efficiently) be converted to JSON. This is useful for returning large objects @@ -36,6 +36,49 @@ def get_image(self) -> MyImageBlob: action outputs may be retrieved multiple times after the action has completed, possibly concurrently. Creating a temp folder and making a file inside it with `.Blob.from_temporary_directory` is the safest way to deal with this. + +**Serialisation** + +`.Blob` objects are serialised to a JSON representation that includes a download +``href``\ . This is generated using `.middleware.url_for` which uses a context +variable to pass the function that generates URLs to the serialiser code. That +context variable is available in every response handler function in the FastAPI +app - but it is not, in general, available in action or property code (because +actions and properties run their code in separate threads). The sequence of events +that leads to a `Blob` being downloaded as a result of an action is roughly: + +* A `POST` request invokes the action. + * `.middleware.url_for.url_for_middleware` makes `url_for` accessible via + a context variable + * A `201` response is returned that includes an ``href`` to poll the action. + * Action code is run in a separate thread (without `url_for` in the context): + * The action creates a `.Blob` object. + * The function that creates the `.Blob` object also creates a `.BlobData` + object as a property of the `.Blob` + * The `.BlobData` object's constructor adds it to the ``blob_manager`` and + sets its ``id`` property accordingly. + * The `.Blob` is returned by the action. + * The output value of the action is stored in the `.Invocation` thread. +* A `GET` request polls the action. Once it has completed: + * `.middleware.url_for.url_for_middleware` makes `url_for` accessible via + a context variable + * The `.Invocation` model is returned, which includes the `.Blob` in the + ``output`` field. + * FastAPI serialises the invocation model, which in turn serialises the `.Blob` + and uses ``url_for`` to generate a valid download ``href`` including the ``id`` + of the `.BlobData` object. +* A further `GET` request actually downloads the `.Blob`\ . + +This slightly complicated sequence ensures that we only ever send URLs back to the +client using `url_for` from the current `.fastapi.Request` object. That means the +URL used should be consistent with the URL of the request - so if an action is +started by a client using one IP address or DNS name, and polled by a different +client, each client will get a download ``href`` that matches the address they are +already using. + +In the future, it may be possible to respond directly with the `.Blob` data to +the original `POST` request, however this only works for quick actions so for now +we use the sequence above, which will work for both quick and slow actions. """ from __future__ import annotations