From 704416a7fd0d24eca85908100bd5377a7b1c71d0 Mon Sep 17 00:00:00 2001 From: Kai Wang Date: Sun, 25 Jan 2026 20:52:20 +0800 Subject: [PATCH 1/4] [features] change SHOT estimation to a dynamic schedule --- benchmarks/CMakeLists.txt | 4 + benchmarks/features/shot.cpp | 169 ++++++++++++++++++ .../include/pcl/features/impl/shot_omp.hpp | 6 +- features/include/pcl/features/shot_omp.h | 25 ++- 4 files changed, 197 insertions(+), 7 deletions(-) create mode 100644 benchmarks/features/shot.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 5ef8f0eb3f4..d417cf8f195 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -18,6 +18,10 @@ PCL_ADD_BENCHMARK(features_normal_3d FILES features/normal_3d.cpp ARGUMENTS "${PCL_SOURCE_DIR}/test/table_scene_mug_stereo_textured.pcd" "${PCL_SOURCE_DIR}/test/milk_cartoon_all_small_clorox.pcd") +PCL_ADD_BENCHMARK(features_shot FILES features/shot.cpp + LINK_WITH pcl_io pcl_search pcl_features + ARGUMENTS "${PCL_SOURCE_DIR}/test/milk.pcd") + PCL_ADD_BENCHMARK(filters_voxel_grid FILES filters/voxel_grid.cpp LINK_WITH pcl_io pcl_filters ARGUMENTS "${PCL_SOURCE_DIR}/test/table_scene_mug_stereo_textured.pcd" diff --git a/benchmarks/features/shot.cpp b/benchmarks/features/shot.cpp new file mode 100644 index 00000000000..24fa91641cf --- /dev/null +++ b/benchmarks/features/shot.cpp @@ -0,0 +1,169 @@ +#include +#include +#include +#include +#include + +#include + +static void +BM_SHOT352(benchmark::State& state, const std::string& file) +{ + pcl::PointCloud::Ptr cloud(new pcl::PointCloud); + pcl::PCDReader reader; + reader.read(file, *cloud); + + pcl::NormalEstimationOMP ne; + ne.setInputCloud(cloud); + ne.setKSearch(10); + pcl::PointCloud::Ptr normals(new pcl::PointCloud); + ne.compute(*normals); + + pcl::SHOTEstimation shot; + shot.setInputCloud(cloud); + shot.setInputNormals(normals); + shot.setRadiusSearch(0.04); + + pcl::PointCloud::Ptr output(new pcl::PointCloud); + for (auto _ : state) { + shot.compute(*output); + } +} + +static void +BM_SHOT352_OMP(benchmark::State& state, const std::string& file) +{ + pcl::PointCloud::Ptr cloud(new pcl::PointCloud); + pcl::PCDReader reader; + reader.read(file, *cloud); + + pcl::NormalEstimationOMP ne; + ne.setInputCloud(cloud); + ne.setKSearch(10); + pcl::PointCloud::Ptr normals(new pcl::PointCloud); + ne.compute(*normals); + + pcl::SHOTEstimationOMP shot( + 0, state.range(0) // + ); + shot.setInputCloud(cloud); + shot.setInputNormals(normals); + shot.setRadiusSearch(0.04); + + pcl::PointCloud::Ptr output(new pcl::PointCloud); + for (auto _ : state) { + shot.compute(*output); + } +} + +static void +BM_SHOT1344(benchmark::State& state, const std::string& file) +{ + pcl::PointCloud::Ptr cloud_xyz(new pcl::PointCloud); + pcl::PCDReader reader; + reader.read(file, *cloud_xyz); + + pcl::NormalEstimationOMP ne; + ne.setInputCloud(cloud_xyz); + ne.setKSearch(10); + pcl::PointCloud::Ptr normals(new pcl::PointCloud); + ne.compute(*normals); + + pcl::PointCloud::Ptr cloud_rgba( + new pcl::PointCloud); + + for (int i = 0; i < static_cast(cloud_xyz->size()); ++i) { + pcl::PointXYZRGBA p; + p.x = (*cloud_xyz)[i].x; + p.y = (*cloud_xyz)[i].y; + p.z = (*cloud_xyz)[i].z; + + p.rgba = ((i % 255) << 16) + (((255 - i) % 255) << 8) + ((i * 37) % 255); + cloud_rgba->push_back(p); + } + + pcl::SHOTColorEstimation shot(true, + true); + shot.setInputCloud(cloud_rgba); + shot.setInputNormals(normals); + shot.setRadiusSearch(0.04); + + pcl::PointCloud::Ptr output(new pcl::PointCloud); + for (auto _ : state) { + shot.compute(*output); + } +} + +static void +BM_SHOT1344_OMP(benchmark::State& state, const std::string& file) +{ + pcl::PointCloud::Ptr cloud_xyz(new pcl::PointCloud); + pcl::PCDReader reader; + reader.read(file, *cloud_xyz); + + pcl::NormalEstimationOMP ne; + ne.setInputCloud(cloud_xyz); + ne.setKSearch(10); + pcl::PointCloud::Ptr normals(new pcl::PointCloud); + ne.compute(*normals); + + pcl::PointCloud::Ptr cloud_rgba( + new pcl::PointCloud); + + for (int i = 0; i < static_cast(cloud_xyz->size()); ++i) { + pcl::PointXYZRGBA p; + p.x = (*cloud_xyz)[i].x; + p.y = (*cloud_xyz)[i].y; + p.z = (*cloud_xyz)[i].z; + + p.rgba = ((i % 255) << 16) + (((255 - i) % 255) << 8) + ((i * 37) % 255); + cloud_rgba->push_back(p); + } + + pcl::SHOTColorEstimationOMP shot( + true, true, 0, state.range(0)); + shot.setInputCloud(cloud_rgba); + shot.setInputNormals(normals); + shot.setRadiusSearch(0.04); + + pcl::PointCloud::Ptr output(new pcl::PointCloud); + for (auto _ : state) { + shot.compute(*output); + } +} + +int +main(int argc, char** argv) +{ + if (argc < 2) { + std::cerr << "No test files given. Please provide a PCD file paths for SHOT352 " + "and SHOT1344 benchmarks." + << std::endl; + return (-1); + } + + constexpr int runs = 100; + + benchmark::RegisterBenchmark("BM_SHOT352", &BM_SHOT352, argv[1]) + ->Unit(benchmark::kMillisecond) + ->Iterations(runs); + benchmark::RegisterBenchmark("BM_SHOT352_OMP", &BM_SHOT352_OMP, argv[1]) + ->Arg(64) + ->Arg(128) + ->Arg(256) + ->Unit(benchmark::kMillisecond) + ->Iterations(runs); + + benchmark::RegisterBenchmark("BM_SHOT1344", &BM_SHOT1344, argv[1]) + ->Unit(benchmark::kMillisecond) + ->Iterations(runs); + benchmark::RegisterBenchmark("BM_SHOT1344_OMP", &BM_SHOT1344_OMP, argv[1]) + ->Arg(64) + ->Arg(128) + ->Arg(256) + ->Unit(benchmark::kMillisecond) + ->Iterations(runs); + + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); +} diff --git a/features/include/pcl/features/impl/shot_omp.hpp b/features/include/pcl/features/impl/shot_omp.hpp index 4a0a01a1988..e5137a75bd8 100644 --- a/features/include/pcl/features/impl/shot_omp.hpp +++ b/features/include/pcl/features/impl/shot_omp.hpp @@ -153,7 +153,8 @@ pcl::SHOTEstimationOMP::computeFeature ( #pragma omp parallel for \ default(none) \ shared(output) \ - num_threads(threads_) + num_threads(threads_) \ + schedule(dynamic, chunk_size_) for (std::ptrdiff_t idx = 0; idx < static_cast (indices_->size ()); ++idx) { @@ -240,7 +241,8 @@ pcl::SHOTColorEstimationOMP::computeFeat #pragma omp parallel for \ default(none) \ shared(output) \ - num_threads(threads_) + num_threads(threads_) \ + schedule(dynamic, chunk_size_) for (std::ptrdiff_t idx = 0; idx < static_cast (indices_->size ()); ++idx) { Eigen::VectorXf shot; diff --git a/features/include/pcl/features/shot_omp.h b/features/include/pcl/features/shot_omp.h index 59d48eaac17..3e339270015 100644 --- a/features/include/pcl/features/shot_omp.h +++ b/features/include/pcl/features/shot_omp.h @@ -94,8 +94,12 @@ namespace pcl using PointCloudOut = typename Feature::PointCloudOut; using PointCloudIn = typename Feature::PointCloudIn; - /** \brief Empty constructor. */ - SHOTEstimationOMP (unsigned int nr_threads = 0) : SHOTEstimation () + /** \brief Empty constructor. + * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param chunk_size PCL will use dynamic scheduling with this chunk size. Setting it too low will lead to more parallelization overhead. Setting it too high will lead to a worse balancing between the threads. + */ + explicit SHOTEstimationOMP (unsigned int nr_threads = 0, int chunk_size = 256) + : SHOTEstimation (), chunk_size_(chunk_size) { setNumberOfThreads(nr_threads); }; @@ -122,6 +126,9 @@ namespace pcl /** \brief The number of threads the scheduler should use. */ unsigned int threads_; + + /** \brief Chunk size for (dynamic) scheduling. */ + int chunk_size_; }; /** \brief SHOTColorEstimationOMP estimates the Signature of Histograms of OrienTations (SHOT) descriptor for a given point cloud dataset @@ -176,11 +183,16 @@ namespace pcl using PointCloudOut = typename Feature::PointCloudOut; using PointCloudIn = typename Feature::PointCloudIn; - /** \brief Empty constructor. */ + /** \brief Empty constructor. + * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param chunk_size PCL will use dynamic scheduling with this chunk size. Setting it too low will lead to more parallelization overhead. Setting it too high will lead to a worse balancing between the threads. + */ SHOTColorEstimationOMP (bool describe_shape = true, bool describe_color = true, - unsigned int nr_threads = 0) - : SHOTColorEstimation (describe_shape, describe_color) + unsigned int nr_threads = 0, + int chunk_size = 64) + : SHOTColorEstimation (describe_shape, describe_color), + chunk_size_(chunk_size) { setNumberOfThreads(nr_threads); } @@ -207,6 +219,9 @@ namespace pcl /** \brief The number of threads the scheduler should use. */ unsigned int threads_; + + /** \brief Chunk size for (dynamic) scheduling. */ + int chunk_size_; }; } From f44d57620914015584b5e15a065100765091d995 Mon Sep 17 00:00:00 2001 From: Kai Wang Date: Thu, 29 Jan 2026 14:03:56 +0800 Subject: [PATCH 2/4] change test file, add uniform sampling --- benchmarks/CMakeLists.txt | 2 +- benchmarks/features/shot.cpp | 91 +++++++++++++++++++++--------------- 2 files changed, 54 insertions(+), 39 deletions(-) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index d417cf8f195..1934d21e243 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -20,7 +20,7 @@ PCL_ADD_BENCHMARK(features_normal_3d FILES features/normal_3d.cpp PCL_ADD_BENCHMARK(features_shot FILES features/shot.cpp LINK_WITH pcl_io pcl_search pcl_features - ARGUMENTS "${PCL_SOURCE_DIR}/test/milk.pcd") + ARGUMENTS "${PCL_SOURCE_DIR}/test/milk_cartoon_all_small_clorox.pcd") PCL_ADD_BENCHMARK(filters_voxel_grid FILES filters/voxel_grid.cpp LINK_WITH pcl_io pcl_filters diff --git a/benchmarks/features/shot.cpp b/benchmarks/features/shot.cpp index 24fa91641cf..fbfe905848c 100644 --- a/benchmarks/features/shot.cpp +++ b/benchmarks/features/shot.cpp @@ -1,11 +1,16 @@ #include #include +#include #include #include #include #include +constexpr float lrf_radius = 0.04f; +constexpr float shot_search_radius = 0.04f; +constexpr float uniform_sampling_radius = 0.03f; + static void BM_SHOT352(benchmark::State& state, const std::string& file) { @@ -19,10 +24,18 @@ BM_SHOT352(benchmark::State& state, const std::string& file) pcl::PointCloud::Ptr normals(new pcl::PointCloud); ne.compute(*normals); + pcl::UniformSampling uniform_sampling; + uniform_sampling.setInputCloud(cloud); + uniform_sampling.setRadiusSearch(uniform_sampling_radius); + pcl::PointCloud::Ptr keypoints(new pcl::PointCloud); + uniform_sampling.filter(*keypoints); + pcl::SHOTEstimation shot; - shot.setInputCloud(cloud); + shot.setInputCloud(keypoints); shot.setInputNormals(normals); - shot.setRadiusSearch(0.04); + shot.setSearchSurface(cloud); + shot.setRadiusSearch(shot_search_radius); + shot.setLRFRadius(lrf_radius); pcl::PointCloud::Ptr output(new pcl::PointCloud); for (auto _ : state) { @@ -43,12 +56,20 @@ BM_SHOT352_OMP(benchmark::State& state, const std::string& file) pcl::PointCloud::Ptr normals(new pcl::PointCloud); ne.compute(*normals); + pcl::UniformSampling uniform_sampling; + uniform_sampling.setInputCloud(cloud); + uniform_sampling.setRadiusSearch(uniform_sampling_radius); + pcl::PointCloud::Ptr keypoints(new pcl::PointCloud); + uniform_sampling.filter(*keypoints); + pcl::SHOTEstimationOMP shot( 0, state.range(0) // ); - shot.setInputCloud(cloud); + shot.setInputCloud(keypoints); shot.setInputNormals(normals); - shot.setRadiusSearch(0.04); + shot.setSearchSurface(cloud); + shot.setRadiusSearch(shot_search_radius); + shot.setLRFRadius(lrf_radius); pcl::PointCloud::Ptr output(new pcl::PointCloud); for (auto _ : state) { @@ -59,34 +80,31 @@ BM_SHOT352_OMP(benchmark::State& state, const std::string& file) static void BM_SHOT1344(benchmark::State& state, const std::string& file) { - pcl::PointCloud::Ptr cloud_xyz(new pcl::PointCloud); + pcl::PointCloud::Ptr cloud_rgba( + new pcl::PointCloud); pcl::PCDReader reader; - reader.read(file, *cloud_xyz); + reader.read(file, *cloud_rgba); - pcl::NormalEstimationOMP ne; - ne.setInputCloud(cloud_xyz); + pcl::NormalEstimationOMP ne; + ne.setInputCloud(cloud_rgba); ne.setKSearch(10); pcl::PointCloud::Ptr normals(new pcl::PointCloud); ne.compute(*normals); - pcl::PointCloud::Ptr cloud_rgba( + pcl::UniformSampling uniform_sampling; + uniform_sampling.setInputCloud(cloud_rgba); + uniform_sampling.setRadiusSearch(uniform_sampling_radius); + pcl::PointCloud::Ptr keypoints( new pcl::PointCloud); - - for (int i = 0; i < static_cast(cloud_xyz->size()); ++i) { - pcl::PointXYZRGBA p; - p.x = (*cloud_xyz)[i].x; - p.y = (*cloud_xyz)[i].y; - p.z = (*cloud_xyz)[i].z; - - p.rgba = ((i % 255) << 16) + (((255 - i) % 255) << 8) + ((i * 37) % 255); - cloud_rgba->push_back(p); - } + uniform_sampling.filter(*keypoints); pcl::SHOTColorEstimation shot(true, true); - shot.setInputCloud(cloud_rgba); + shot.setInputCloud(keypoints); shot.setInputNormals(normals); - shot.setRadiusSearch(0.04); + shot.setSearchSurface(cloud_rgba); + shot.setRadiusSearch(shot_search_radius); + shot.setLRFRadius(lrf_radius); pcl::PointCloud::Ptr output(new pcl::PointCloud); for (auto _ : state) { @@ -97,34 +115,31 @@ BM_SHOT1344(benchmark::State& state, const std::string& file) static void BM_SHOT1344_OMP(benchmark::State& state, const std::string& file) { - pcl::PointCloud::Ptr cloud_xyz(new pcl::PointCloud); + pcl::PointCloud::Ptr cloud_rgba( + new pcl::PointCloud); pcl::PCDReader reader; - reader.read(file, *cloud_xyz); + reader.read(file, *cloud_rgba); - pcl::NormalEstimationOMP ne; - ne.setInputCloud(cloud_xyz); + pcl::NormalEstimationOMP ne; + ne.setInputCloud(cloud_rgba); ne.setKSearch(10); pcl::PointCloud::Ptr normals(new pcl::PointCloud); ne.compute(*normals); - pcl::PointCloud::Ptr cloud_rgba( + pcl::UniformSampling uniform_sampling; + uniform_sampling.setInputCloud(cloud_rgba); + uniform_sampling.setRadiusSearch(uniform_sampling_radius); + pcl::PointCloud::Ptr keypoints( new pcl::PointCloud); - - for (int i = 0; i < static_cast(cloud_xyz->size()); ++i) { - pcl::PointXYZRGBA p; - p.x = (*cloud_xyz)[i].x; - p.y = (*cloud_xyz)[i].y; - p.z = (*cloud_xyz)[i].z; - - p.rgba = ((i % 255) << 16) + (((255 - i) % 255) << 8) + ((i * 37) % 255); - cloud_rgba->push_back(p); - } + uniform_sampling.filter(*keypoints); pcl::SHOTColorEstimationOMP shot( true, true, 0, state.range(0)); - shot.setInputCloud(cloud_rgba); + shot.setInputCloud(keypoints); shot.setInputNormals(normals); - shot.setRadiusSearch(0.04); + shot.setSearchSurface(cloud_rgba); + shot.setRadiusSearch(shot_search_radius); + shot.setLRFRadius(lrf_radius); pcl::PointCloud::Ptr output(new pcl::PointCloud); for (auto _ : state) { From abd79ff00f234ae3091c09f446ae197b3a78d1b9 Mon Sep 17 00:00:00 2001 From: Kai Wang <53281385+kai-waang@users.noreply.github.com> Date: Sun, 1 Feb 2026 13:40:07 +0800 Subject: [PATCH 3/4] Update benchmarks/features/shot.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- benchmarks/features/shot.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/features/shot.cpp b/benchmarks/features/shot.cpp index fbfe905848c..cb7a01e9207 100644 --- a/benchmarks/features/shot.cpp +++ b/benchmarks/features/shot.cpp @@ -151,8 +151,8 @@ int main(int argc, char** argv) { if (argc < 2) { - std::cerr << "No test files given. Please provide a PCD file paths for SHOT352 " - "and SHOT1344 benchmarks." + std::cerr << "No test files given. Please provide a PCD file path to use for " + "both SHOT352 and SHOT1344 benchmarks." << std::endl; return (-1); } From 8e05351d25793b03d108443dd84c4deabbd88726 Mon Sep 17 00:00:00 2001 From: Kai Wang Date: Sun, 1 Feb 2026 23:18:46 +0800 Subject: [PATCH 4/4] remove explicit --- features/include/pcl/features/shot_omp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/features/include/pcl/features/shot_omp.h b/features/include/pcl/features/shot_omp.h index 3e339270015..50fefdf9a2c 100644 --- a/features/include/pcl/features/shot_omp.h +++ b/features/include/pcl/features/shot_omp.h @@ -98,7 +98,7 @@ namespace pcl * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) * \param chunk_size PCL will use dynamic scheduling with this chunk size. Setting it too low will lead to more parallelization overhead. Setting it too high will lead to a worse balancing between the threads. */ - explicit SHOTEstimationOMP (unsigned int nr_threads = 0, int chunk_size = 256) + SHOTEstimationOMP (unsigned int nr_threads = 0, int chunk_size = 256) : SHOTEstimation (), chunk_size_(chunk_size) { setNumberOfThreads(nr_threads);