Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pathlib import Path

PROJECT_ROOT = Path(__file__).parent.parent
BASE_URL = "http://localhost:8000"
14 changes: 14 additions & 0 deletions src/dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from pathlib import Path
from uuid import UUID, uuid4

from fastapi import Request


def get_tmp_dir(request: Request) -> Path:
"""Get the temporary directory from the app state."""
return Path(request.app.state.temp_dir.name)


def get_token() -> UUID:
"""Generate a unique token for file storage."""
return uuid4()
10 changes: 10 additions & 0 deletions src/extractors/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from .helpers import get_image_access
from .router import extractor_route
from .schemas import ProcessData, ProcessedDataAccess

__all__ = (
"extractor_route",
"get_image_access",
"ProcessData",
"ProcessedDataAccess",
)
21 changes: 21 additions & 0 deletions src/extractors/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from pathlib import Path
from uuid import UUID

from constants import BASE_URL

from .router import ROUTE
from .schemas import ImageAccess


def get_image_access(temp_dir: Path, token: UUID) -> ImageAccess:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def get_image_access(temp_dir: Path, token: UUID) -> ImageAccess:
def get_file_access(temp_dir: Path, token: UUID) -> ImageAccess:

since we are not only generating urls for images but also for other types of files (x3p etc.)

"""Generate a base URL for file retrieval and create the token directory.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"""Generate a base URL for file retrieval and create the token directory.
"""
Generate a base URL for file retrieval and create the token directory.

Creates a token-specific subdirectory within the temporary directory
and returns the base URL that can be used to retrieve files from that directory.
:param temp_dir: Temporary directory to store files.
:param token: Unique token identifying the directory.
:returns: Base URL string for file retrieval endpoints.
"""
(output_dir := temp_dir / str(token)).mkdir(parents=True, exist_ok=True)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still believe exist_ok should not be True here, since the token is supposed to be random

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

based on the test created that is not the case. I need to know why he chose to do it this way, inorder to switch it. because removing the exist_ok=True will raise an error if another directory of the same name is found. one thing is for sure supposed is not what it's actually doing.

Copy link
Collaborator

@cfs-data cfs-data Jan 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you are right and the uuid4() is only called once, (assuming token: UUID = Depends(get_token) is only called once during lifetime of the app?) which means we have a single temp directory for processing al3d files.

I think there are some pro's and cons to this approach. However, the current approach is then clearly problematic when there are multiple files processed (by one or more users).

A better approach IMO is to use uuid4() for generating random tokens each time, or even better, use the file hash of the raw bytes to generate a unique directory name

return ImageAccess(output_dir, f"{BASE_URL}{ROUTE}/file/{token}")
Copy link
Collaborator

@cfs-data cfs-data Jan 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add the keyword names here?

ImageAccess(resource_path=..., access_url=...')

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return ImageAccess(output_dir, f"{BASE_URL}{ROUTE}/file/{token}")
return ImageAccess(output_dir, f"{BASE_URL}{ROUTE}/files/{token}")

Following REST API naming conventions for resources I think this should be plural, see also: https://google.aip.dev/131

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok then I need to change it everywhere, will do.

55 changes: 55 additions & 0 deletions src/extractors/router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from http import HTTPStatus
from pathlib import Path

from fastapi import APIRouter, Depends
from fastapi.exceptions import HTTPException
from fastapi.responses import FileResponse
from loguru import logger
from pydantic import UUID4

from dependencies import get_tmp_dir

from .schemas import FileName

ROUTE = "/extractor"
extractor_route = APIRouter(prefix=ROUTE, tags=[ROUTE])


@extractor_route.get(
path="/file/{token}/{file_name}",
summary="Giving an path returns a image.",
Copy link
Collaborator

@cfs-data cfs-data Jan 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
summary="Giving an path returns a image.",
summary="Return an image from a filepath.",

or something similar?

description="""
given some file path returns the image located at the path.
Copy link
Collaborator

@cfs-data cfs-data Jan 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the difference between summary and description? Maybe we can remove one of them?

""",
responses={
HTTPStatus.BAD_REQUEST: {"description": "Unsupported file type requested."},
HTTPStatus.BAD_REQUEST: {"description": "Invalid token."},
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are overwriting the previous entry in the dictionary here

HTTPStatus.NOT_FOUND: {"description": "File not found"},
},
)
async def get_file(
token: UUID4,
file_name: FileName,
temp_dir: Path = Depends(get_tmp_dir),
) -> FileResponse:
"""
Get image from file path.
This endpoint retrieves an image/scan file from a temporary directory and returns it as a FileResponse.
:param token: Temporary directory token.
:param file_name: Name of the file to retrieve (must end with .png or .x3p).
:param temp_dir: Temporary directory to store temporary files.
:returns: FileResponse containing the requested image.
"""
if not next(temp_dir.glob(str(token)), None):
logger.error(f"Directory not found {temp_dir}/{token}")
Copy link
Collaborator

@cfs-data cfs-data Jan 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using the forward slash in a string here instead of the overloaded Path operator (/) mixes up Windows and Linux path formats on Windows machines

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch

raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=f"Expired or invalid token: {token}")
if not (file_path := temp_dir / str(token) / file_name).exists():
logger.error(f"File not found in temp dir: {file_path}")
raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=f"File {file_path.name} not found.")

return FileResponse(
path=file_path,
media_type="image/png" if file_name.endswith(".png") else "application/octet-stream",
)
76 changes: 76 additions & 0 deletions src/extractors/schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from __future__ import annotations

from enum import StrEnum, auto
from pathlib import Path
from typing import Annotated, NamedTuple, Protocol

from pydantic import AfterValidator, Field, HttpUrl

from models import BaseModelConfig, validate_file_extension


class SupportedExtension(StrEnum):
X3P = auto()
PNG = auto()


type FileName = Annotated[
str,
AfterValidator(lambda filename: validate_file_extension(filename, SupportedExtension)),
Field(
...,
description=f"Filename of type: {', '.join(SupportedExtension)}",
examples=["example.png", "scan.x3p"],
),
]


class ImageAccess(NamedTuple):
resource_path: Path
access_url: str


class ProcessData(Protocol):
@property
def surfacemap_filename(self) -> str: ...
Copy link
Collaborator

@cfs-data cfs-data Jan 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def surfacemap_filename(self) -> str: ...
def surface_map_filename(self) -> str: ...

I see now that these words are inconsistently separated in the codebase.. However, I do believe they should be written as two separate words

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

surface map is two words I will search for all one word instances and adjust them accordingly.


@property
def preview_filename(self) -> str: ...

@property
def x3p_filename(self) -> str: ...


class ProcessedDataAccess(BaseModelConfig):
x3p_image: HttpUrl = Field(
...,
description="converted subsampled X3P image.",
examples=["http://localhost:8000/preprocessor/file/surface_comparator_859lquto/scan.x3p"],
)
preview_image: HttpUrl = Field(
...,
description="rgba image made from the x3p converted file.",
Copy link
Collaborator

@cfs-data cfs-data Jan 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
description="rgba image made from the x3p converted file.",
description="rgba image rendered from the parsed scan data.",

We do not parse the x3p file in the pipeline.

(same comment for the example below)

examples=["http://localhost:8000/preprocessor/file/surface_comparator_859lquto/preview.png"],
)
surfacemap_image: HttpUrl = Field(
...,
description="surface image made from the x3p converted file.",
examples=["http://localhost:8000/preprocessor/file/surface_comparator_859lquto/surface_map.png"],
)

@classmethod
def from_access_point(cls, access_url: str, process_data: ProcessData) -> ProcessedDataAccess:
"""Create ProcessedDataAccess from access URL and process data.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"""Create ProcessedDataAccess from access URL and process data.
"""
Create ProcessedDataAccess from access URL and process data.

Constructs a ProcessedDataAccess instance with complete URLs for all processed files
by combining the base access URL with filenames from the process data.
:param access_url: Base URL for file retrieval endpoints.
:param process_data: ProcessData protocol containing file naming information.
:returns: ProcessedDataAccess with complete URLs for x3p, preview, and surfacemap files.
"""
return ProcessedDataAccess(
x3p_image=HttpUrl(f"{access_url}/{process_data.x3p_filename}"),
preview_image=HttpUrl(f"{access_url}/{process_data.preview_filename}"),
surfacemap_image=HttpUrl(f"{access_url}/{process_data.surfacemap_filename}"),
)
30 changes: 29 additions & 1 deletion src/main.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,45 @@
import tempfile
from contextlib import asynccontextmanager

from fastapi import APIRouter, FastAPI
from loguru import logger
from uvicorn import run

from comparators.router import comparison_router
from extractors.router import extractor_route
from preprocessors import preprocessor_route
from processors.router import processors

app = FastAPI()

@asynccontextmanager
async def lifespan(app: FastAPI):
"""
Create and clean up a temporary directory for the application lifespan.
This context manager sets up a temporary directory when the FastAPI application
starts and ensures it is cleaned up when the application shuts down.
:param app: The FastAPI application instance.
:yields: ``None``
"""
temp_dir = tempfile.TemporaryDirectory(prefix="surface_comparator_")
app.state.temp_dir = temp_dir
logger.info(f"Temporary directory created at: {temp_dir.name}")
try:
yield
finally:
logger.info(f"Cleaning up temporary directory: {temp_dir.name}")
temp_dir.cleanup()


app = FastAPI(lifespan=lifespan, title="Scratch API", version="0.1.0")
prefix_router = APIRouter()

prefix_router.include_router(preprocessor_route)
prefix_router.include_router(processors)
prefix_router.include_router(comparison_router)
prefix_router.include_router(extractor_route)
app.include_router(prefix_router)


Expand Down
9 changes: 9 additions & 0 deletions src/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from enum import StrEnum

from pydantic import BaseModel, ConfigDict


Expand All @@ -7,3 +9,10 @@ class BaseModelConfig(BaseModel):
regex_engine="rust-regex",
extra="forbid",
)


def validate_file_extension(file_name: str, extensions: type[StrEnum]) -> str:
"""Validate that the file has a supported extension."""
if not file_name.endswith(tuple(extensions)):
raise ValueError(f"unsupported file type: {file_name}, try: {', '.join(extensions)}")
return file_name
7 changes: 1 addition & 6 deletions src/preprocessors/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
from .router import preprocessor_route
from .schemas import ProcessedDataLocation, UploadScan

__all__ = (
"preprocessor_route",
"UploadScan",
"ProcessedDataLocation",
)
__all__ = ("preprocessor_route",)
37 changes: 25 additions & 12 deletions src/preprocessors/router.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
from http import HTTPStatus
from pathlib import Path
from uuid import UUID

from fastapi import APIRouter
from fastapi import APIRouter, Depends
from loguru import logger

from dependencies import get_tmp_dir, get_token
from extractors import ProcessedDataAccess, get_image_access

from .pipelines import parse_scan_pipeline, preview_pipeline, surface_map_pipeline, x3p_pipeline
from .schemas import ProcessedDataLocation, UploadScan
from .schemas import UploadScan

preprocessor_route = APIRouter(
prefix="/preprocessor",
tags=["preprocessor"],
)
ROUTE = "/preprocessor"
preprocessor_route = APIRouter(prefix=ROUTE, tags=[ROUTE])


@preprocessor_route.get(
Expand Down Expand Up @@ -39,17 +43,26 @@ async def comparison_root() -> dict[str, str]:
HTTPStatus.INTERNAL_SERVER_ERROR: {"description": "image generation error"},
},
)
async def process_scan(upload_scan: UploadScan) -> ProcessedDataLocation:
async def process_scan(
upload_scan: UploadScan,
temp_dir: Path = Depends(get_tmp_dir),
token: UUID = Depends(get_token),
) -> ProcessedDataAccess:
"""
Process an uploaded scan file and generate derived output files.
This endpoint parses and validates the incoming scan file, performs the
necessary processing steps, and produces several outputs such as an X3P
file, a preview image, and a surface map saved to the output directory.
file, a preview image, and a surface map saved to an temp directiory and returns urls to retrieve them.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
file, a preview image, and a surface map saved to an temp directiory and returns urls to retrieve them.
file, a preview image, and a surface map saved to a temp directiory and returns urls to retrieve them.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
file, a preview image, and a surface map saved to an temp directiory and returns urls to retrieve them.
file, a preview image, and a surface map saved to an temp directory and returns urls to retrieve them.

"""
image_access = get_image_access(temp_dir, token)

parsed_scan = parse_scan_pipeline(upload_scan.scan_file, upload_scan.parameters)
return ProcessedDataLocation(
x3p_image=x3p_pipeline(parsed_scan, upload_scan.x3p_path),
surfacemap_image=surface_map_pipeline(parsed_scan, upload_scan.surfacemap_path, upload_scan.parameters),
preview_image=preview_pipeline(parsed_scan, upload_scan.preview_path),
x3p_pipeline(parsed_scan, image_access.resource_path / upload_scan.x3p_filename)
surface_map_pipeline(
parsed_scan, image_access.resource_path / upload_scan.surfacemap_filename, upload_scan.parameters
)
preview_pipeline(parsed_scan, image_access.resource_path / upload_scan.preview_filename)

logger.info(f"Generated files saved to {temp_dir}")
return ProcessedDataAccess.from_access_point(image_access.access_url, upload_scan)
Loading