From c4b8657d1966d0dfd81216f4f74e404f7596e0f5 Mon Sep 17 00:00:00 2001 From: Xiaoyan Wang Date: Fri, 5 Dec 2025 16:13:36 -0500 Subject: [PATCH 1/3] Use thrust::device_vector/host_vector to simplify RAII storage management --- genmetaballs/src/cuda/core/fmb.cu | 22 -------- genmetaballs/src/cuda/core/fmb.cuh | 83 ++++++++++-------------------- 2 files changed, 27 insertions(+), 78 deletions(-) diff --git a/genmetaballs/src/cuda/core/fmb.cu b/genmetaballs/src/cuda/core/fmb.cu index c391fb1..ec680e5 100644 --- a/genmetaballs/src/cuda/core/fmb.cu +++ b/genmetaballs/src/cuda/core/fmb.cu @@ -15,25 +15,3 @@ CUDA_CALLABLE float FMB::quadratic_form(const Vec3D vec) const { const auto shifted_vec = vec - get_mean(); return dot(shifted_vec, cov_inv_apply(shifted_vec)); } - -template <> -__host__ FMBScene::FMBScene(size_t size) - : fmbs_{new FMB[size]}, log_weights_{new float[size]}, size_{size} {} - -template <> -__host__ FMBScene::FMBScene(size_t size) : size_{size} { - CUDA_CHECK(cudaMalloc(&fmbs_, size * sizeof(FMB))); - CUDA_CHECK(cudaMalloc(&log_weights_, size * sizeof(float))); -} - -template <> -__host__ FMBScene::~FMBScene() { - delete[] fmbs_; - delete[] log_weights_; -} - -template <> -__host__ FMBScene::~FMBScene() { - CUDA_CHECK(cudaFree(fmbs_)); - CUDA_CHECK(cudaFree(log_weights_)); -} diff --git a/genmetaballs/src/cuda/core/fmb.cuh b/genmetaballs/src/cuda/core/fmb.cuh index 021fdcc..c9db195 100644 --- a/genmetaballs/src/cuda/core/fmb.cuh +++ b/genmetaballs/src/cuda/core/fmb.cuh @@ -2,7 +2,12 @@ #include #include +#include #include +#include +#include +#include +#include #include "geometry.cuh" #include "utils.cuh" @@ -46,74 +51,40 @@ public: template class FMBScene { private: - FMB* fmbs_; - float* log_weights_; + // Host memory -> thrust::host_vector + // Device memory -> thrust::device_vector + template + using vector_t = std::conditional_t, + thrust::device_vector>; + + vector_t fmbs_; + vector_t log_weights_; size_t size_; public: - __host__ FMBScene(size_t size); + __host__ FMBScene(size_t size) : size_{size}, fmbs_(size), log_weights_(size) {}; - __host__ ~FMBScene(); - - CUDA_CALLABLE cuda::std::tuple operator[](const uint32_t i) { - return cuda::std::tie(fmbs_[i], log_weights_[i]); } - CUDA_CALLABLE cuda::std::tuple operator[](const uint32_t i) const { - return cuda::std::tie(fmbs_[i], log_weights_[i]); + CUDA_CALLABLE auto operator[](const uint32_t i) { + return cuda::std::make_tuple(fmbs_[i], log_weights_[i]); } - class Iterator { - private: - FMB* fmb_ptr_; - float* log_weight_ptr_; - - public: - CUDA_CALLABLE Iterator(FMB* const fmb_ptr, float* const log_weight_ptr) - : fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {} - CUDA_CALLABLE cuda::std::tuple operator*() { - return cuda::std::tie(*fmb_ptr_, *log_weight_ptr_); - } - CUDA_CALLABLE bool operator!=(const Iterator& other) const { - return fmb_ptr_ != other.fmb_ptr_ || log_weight_ptr_ != other.log_weight_ptr_; - } - CUDA_CALLABLE Iterator& operator++() { - fmb_ptr_++, log_weight_ptr_++; - return *this; - } - }; - - class ConstIterator { - private: - const FMB* fmb_ptr_; - const float* log_weight_ptr_; - - public: - CUDA_CALLABLE ConstIterator(const FMB* const fmb_ptr, const float* const log_weight_ptr) - : fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {} - CUDA_CALLABLE cuda::std::tuple operator*() const { - return cuda::std::tie(*fmb_ptr_, *log_weight_ptr_); - } - CUDA_CALLABLE bool operator!=(const ConstIterator& other) const { - return fmb_ptr_ != other.fmb_ptr_ || log_weight_ptr_ != other.log_weight_ptr_; - } - CUDA_CALLABLE ConstIterator& operator++() { - fmb_ptr_++, log_weight_ptr_++; - return *this; - } - }; + CUDA_CALLABLE auto operator[](const uint32_t i) const { + return cuda::std::make_tuple(fmbs_[i], log_weights_[i]); + } - CUDA_CALLABLE Iterator begin() { - return Iterator(fmbs_, log_weights_); + CUDA_CALLABLE auto begin() { + return thrust::make_zip_iterator(fmbs_.begin(), log_weights_.begin()); } - CUDA_CALLABLE Iterator end() { - return Iterator(fmbs_ + size_, log_weights_ + size_); + CUDA_CALLABLE auto end() { + return thrust::make_zip_iterator(fmbs_.end(), log_weights_.end()); } - CUDA_CALLABLE ConstIterator begin() const { - return ConstIterator(fmbs_, log_weights_); + CUDA_CALLABLE auto begin() const { + return thrust::make_zip_iterator(fmbs_.begin(), log_weights_.begin()); } - CUDA_CALLABLE ConstIterator end() const { - return ConstIterator(fmbs_ + size_, log_weights_ + size_); + CUDA_CALLABLE auto end() const { + return thrust::make_zip_iterator(fmbs_.end(), log_weights_.end()); } CUDA_CALLABLE const FMB& get_fmb(uint32_t idx) const { return fmbs_[idx]; From 3e63cb86535a2d61bbe40fde2f3167c04c15471d Mon Sep 17 00:00:00 2001 From: Xiaoyan Wang Date: Fri, 5 Dec 2025 16:16:46 -0500 Subject: [PATCH 2/3] Binding for constructing FMBScene with values from Python side --- genmetaballs/src/cuda/bindings.cu | 3 +++ genmetaballs/src/cuda/core/fmb.cuh | 9 +++++++++ genmetaballs/src/genmetaballs/core/__init__.py | 15 ++++++++++++++- 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/genmetaballs/src/cuda/bindings.cu b/genmetaballs/src/cuda/bindings.cu index fa5c018..b6cdf3e 100644 --- a/genmetaballs/src/cuda/bindings.cu +++ b/genmetaballs/src/cuda/bindings.cu @@ -253,6 +253,9 @@ template void bind_fmb_scene(nb::module_& m, const char* name) { nb::class_>(m, name) .def(nb::init(), nb::arg("size")) + .def(nb::init&, const std::vector&>(), nb::arg("fmbs"), + nb::arg("log_weights"), + "Construct FMBScene from a list of FMBs and corresponding log weights") .def_prop_ro("size", &FMBScene::size) .def("__len__", &FMBScene::size) .def("__getitem__", &FMBScene::get_fmb, nb::arg("idx"), diff --git a/genmetaballs/src/cuda/core/fmb.cuh b/genmetaballs/src/cuda/core/fmb.cuh index c9db195..70abb9c 100644 --- a/genmetaballs/src/cuda/core/fmb.cuh +++ b/genmetaballs/src/cuda/core/fmb.cuh @@ -64,6 +64,15 @@ private: public: __host__ FMBScene(size_t size) : size_{size}, fmbs_(size), log_weights_(size) {}; + // Copy constructor from std::vector + // This enables easy construction from Python side + __host__ FMBScene(const std::vector& fmbs, const std::vector& log_weights) + : size_{fmbs.size()}, fmbs_(fmbs.begin(), fmbs.end()), + log_weights_(log_weights.begin(), log_weights.end()) { + if (fmbs.size() != log_weights.size()) { + throw std::invalid_argument( + "FMBScene constructor: fmbs and log_weights must have the same size"); + } } CUDA_CALLABLE auto operator[](const uint32_t i) { diff --git a/genmetaballs/src/genmetaballs/core/__init__.py b/genmetaballs/src/genmetaballs/core/__init__.py index 530f526..f966b41 100644 --- a/genmetaballs/src/genmetaballs/core/__init__.py +++ b/genmetaballs/src/genmetaballs/core/__init__.py @@ -10,7 +10,7 @@ TwoParameterConfidence, ZeroParameterConfidence, ) -from genmetaballs._genmetaballs_bindings.fmb import CPUFMBScene, GPUFMBScene +from genmetaballs._genmetaballs_bindings.fmb import FMB, CPUFMBScene, GPUFMBScene from genmetaballs._genmetaballs_bindings.image import CPUImage, GPUImage from genmetaballs._genmetaballs_bindings.utils import CPUFloatArray2D, GPUFloatArray2D, sigmoid @@ -63,6 +63,19 @@ def make_fmb_scene(size: int, device: DeviceType) -> CPUFMBScene | GPUFMBScene: raise ValueError(f"Unsupported device type: {device}") +# TODO: create a wrapper class for FMBScene and turn the factory functions into +# class methods +def fmb_scene_from_values( + fmbs: list[fmb.FMB], log_weights: list[float], device: DeviceType +) -> CPUFMBScene | GPUFMBScene: + if device == "cpu": + return CPUFMBScene(fmbs, log_weights) + elif device == "gpu": + return GPUFMBScene(fmbs, log_weights) + else: + raise ValueError(f"Unsupported device type: {device}") + + __all__ = [ "array2d_float", "ZeroParameterConfidence", From c6abbabf1ff760eed5de3dc4c98a60112e2a382e Mon Sep 17 00:00:00 2001 From: Xiaoyan Wang Date: Fri, 5 Dec 2025 16:16:47 -0500 Subject: [PATCH 3/3] Unit test for constructing FMBScene from value --- genmetaballs/src/cuda/bindings.cu | 13 ++++++-- .../src/genmetaballs/core/__init__.py | 5 ++- tests/python_tests/test_fmb.py | 32 ++++++++++++++++++- 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/genmetaballs/src/cuda/bindings.cu b/genmetaballs/src/cuda/bindings.cu index b6cdf3e..e47698c 100644 --- a/genmetaballs/src/cuda/bindings.cu +++ b/genmetaballs/src/cuda/bindings.cu @@ -258,8 +258,17 @@ void bind_fmb_scene(nb::module_& m, const char* name) { "Construct FMBScene from a list of FMBs and corresponding log weights") .def_prop_ro("size", &FMBScene::size) .def("__len__", &FMBScene::size) - .def("__getitem__", &FMBScene::get_fmb, nb::arg("idx"), - "Get the (FMB, log_weight) tuple at index i") + .def( + "__getitem__", + // Convert cuda::std::tuple to std::tuple for nanobind + [](const FMBScene& scene, size_t idx) { + const auto& [fmb, log_weight] = scene[idx]; + // for device data, the types would be thrust::device_reference, which cannot be + // returned directly to Python. The static cast forces a copy (to host) to be made. + return std::make_tuple(static_cast(fmb), + static_cast(log_weight)); + }, + "Get the (FMB, log_weight) tuple at index i") .def("__repr__", [=](const FMBScene& scene) { return nb::str("{}(size={})").format(name, scene.size()); }); diff --git a/genmetaballs/src/genmetaballs/core/__init__.py b/genmetaballs/src/genmetaballs/core/__init__.py index f966b41..3b442e9 100644 --- a/genmetaballs/src/genmetaballs/core/__init__.py +++ b/genmetaballs/src/genmetaballs/core/__init__.py @@ -65,7 +65,7 @@ def make_fmb_scene(size: int, device: DeviceType) -> CPUFMBScene | GPUFMBScene: # TODO: create a wrapper class for FMBScene and turn the factory functions into # class methods -def fmb_scene_from_values( +def make_fmb_scene_from_values( fmbs: list[fmb.FMB], log_weights: list[float], device: DeviceType ) -> CPUFMBScene | GPUFMBScene: if device == "cpu": @@ -87,7 +87,10 @@ def fmb_scene_from_values( "intersector", "sigmoid", "FourParameterBlender", + "FMB", + "Intrinsics", "ThreeParameterBlender", "make_image", "make_fmb_scene", + "make_fmb_scene_from_values", ] diff --git a/tests/python_tests/test_fmb.py b/tests/python_tests/test_fmb.py index 7044234..981fbec 100644 --- a/tests/python_tests/test_fmb.py +++ b/tests/python_tests/test_fmb.py @@ -3,7 +3,7 @@ from scipy.spatial.distance import mahalanobis from scipy.spatial.transform import Rotation as Rot -from genmetaballs.core import fmb, geometry, make_fmb_scene +from genmetaballs.core import fmb, geometry, make_fmb_scene, make_fmb_scene_from_values FMB = fmb.FMB Pose, Vec3D, Rotation = geometry.Pose, geometry.Vec3D, geometry.Rotation @@ -48,3 +48,33 @@ def test_fmb_scene_creation(): gpu_scene = make_fmb_scene(20, device="gpu") assert isinstance(gpu_scene, fmb.GPUFMBScene) assert len(gpu_scene) == 20 + + +@pytest.mark.parametrize("device", ["cpu", "gpu"]) +def test_fmb_scene_creation_from_lists(rng, device): + fmbs = [] + log_weights = [] + gt_translations = [] + gt_extents = [] + num_balls = 15 + for _ in range(num_balls): + quat = rng.uniform(size=4).astype(np.float32) + tran, extent = rng.uniform(size=(2, 3)).astype(np.float32) + pose = Pose.from_components(Rotation.from_quat(*quat), Vec3D(*tran)) + fmbs.append(FMB(pose, *extent)) + log_weights.append(rng.uniform()) + gt_translations.append(tran) + gt_extents.append(extent) + + scene = make_fmb_scene_from_values(fmbs, log_weights, device=device) + + assert len(scene) == num_balls + # Verify that we can retrieve each FMB and log weight correctly + for i in range(num_balls): + fmb_i, log_weight = scene[i] + translation = fmb_i.pose.tran + assert np.allclose([translation.x, translation.y, translation.z], gt_translations[i]) + + fmb_extent = fmb_i.extent + assert np.allclose(fmb_extent, gt_extents[i]) + assert np.isclose(log_weight, log_weights[i])