From a1ab5856fed3f1891e9318b096deb0e85f074a3e Mon Sep 17 00:00:00 2001 From: Nical <2731935339@qq.com> Date: Wed, 10 Dec 2025 13:53:43 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E2=9C=A8=20feat:=20Support=20parallel=20co?= =?UTF-8?q?mputation=20for=20NDT?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support parallel computation for NDT by OpenMP with different neighbor search method --- .../include/pcl/registration/impl/ndt.hpp | 60 +++++++++++++++---- registration/include/pcl/registration/ndt.h | 31 ++++++++++ test/registration/test_ndt.cpp | 2 + 3 files changed, 82 insertions(+), 11 deletions(-) diff --git a/registration/include/pcl/registration/impl/ndt.hpp b/registration/include/pcl/registration/impl/ndt.hpp index f831c3f45a4..79bf0052a85 100644 --- a/registration/include/pcl/registration/impl/ndt.hpp +++ b/registration/include/pcl/registration/impl/ndt.hpp @@ -43,6 +43,28 @@ namespace pcl { +template +void +NormalDistributionsTransform::setNumberOfThreads( + unsigned int nr_threads) +{ +#ifdef _OPENMP + if (nr_threads == 0) + threads_ = omp_get_num_procs(); + else + threads_ = nr_threads; + PCL_DEBUG("[pcl::NormalDistributionsTransform::setNumberOfThreads] Setting " + "number of threads to %u.\n", + threads_); +#else + threads_ = 1; + if (nr_threads != 1) + PCL_WARN("[pcl::NormalDistributionsTransform::setNumberOfThreads] " + "Parallelization is requested, but OpenMP is not available! Continuing " + "without parallelization.\n"); +#endif // _OPENMP +} + template NormalDistributionsTransform:: NormalDistributionsTransform() @@ -196,25 +218,41 @@ NormalDistributionsTransform::computeDerivativ // Precompute Angular Derivatives (eq. 6.19 and 6.21)[Magnusson 2009] computeAngleDerivatives(transform); + std::vector neighborhood; + std::vector distances; + +#pragma omp parallel for num_threads(threads_) schedule(dynamic, 32) \ + shared(score, score_gradient, hessian) firstprivate(neighborhood, distances) // Update gradient and hessian for each point, line 17 in Algorithm 2 [Magnusson 2009] for (std::size_t idx = 0; idx < input_->size(); idx++) { // Transformed Point const auto& x_trans_pt = trans_cloud[idx]; - // Find neighbors (Radius search has been experimentally faster than direct neighbor - // checking. - std::vector neighborhood; - std::vector distances; - target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances); + // Find neighbors with different search method + switch (search_method_) { + case KDTREE: + // Radius search has been experimentally faster than direct neighbor checking. + target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances); + break; + case DIRECT26: + target_cells_.getNeighborhoodAtPoint(x_trans_pt, neighborhood); + break; + case DIRECT7: + target_cells_.getFaceNeighborsAtPoint(x_trans_pt, neighborhood); + break; + case DIRECT1: + target_cells_.getVoxelAtPoint(x_trans_pt, neighborhood); + break; + } - for (const auto& cell : neighborhood) { - // Original Point - const auto& x_pt = (*input_)[idx]; - const Eigen::Vector3d x = x_pt.getVector3fMap().template cast(); + // Original Point + const Eigen::Vector3d x = (*input_)[idx].getVector3fMap().template cast(); + const Eigen::Vector3d x_trans_position = + x_trans_pt.getVector3fMap().template cast(); + for (const auto& cell : neighborhood) { // Denorm point, x_k' in Equations 6.12 and 6.13 [Magnusson 2009] - const Eigen::Vector3d x_trans = - x_trans_pt.getVector3fMap().template cast() - cell->getMean(); + const Eigen::Vector3d x_trans = x_trans_position - cell->getMean(); // Inverse Covariance of Occupied Voxel // Uses precomputed covariance for speed. const Eigen::Matrix3d c_inv = cell->getInverseCov(); diff --git a/registration/include/pcl/registration/ndt.h b/registration/include/pcl/registration/ndt.h index 26115717bd7..db321b0914d 100644 --- a/registration/include/pcl/registration/ndt.h +++ b/registration/include/pcl/registration/ndt.h @@ -49,6 +49,14 @@ #include namespace pcl { + +enum NeighborSearchMethod { + KDTREE, // Original + DIRECT26, // VoxelGridCovariance::getNeighborhoodAtPoint + DIRECT7, // VoxelGridCovariance::getFaceNeighborsAtPoint + DIRECT1 // VoxelGridCovariance::getVoxelAtPoint +}; + /** \brief A 3D Normal Distribution Transform registration implementation for * point cloud data. * \note For more information please see Magnusson, M. (2009). The @@ -171,6 +179,22 @@ class NormalDistributionsTransform step_size_ = step_size; } + /** \brief Initialize the scheduler and set the number of threads to use. + * \param[in] nr_threads the number of hardware threads to use (0 sets the value back + * to automatic) + */ + inline void + setNumberOfThreads(unsigned int nr_threads = 0); + + /** \brief Set neighborhood search method. + * \param[in] method neighborhood serach method + */ + inline void + setNeighborhoodSearchMethod(const NeighborSearchMethod& method) + { + search_method_ = method; + } + /** \brief Get the point cloud outlier ratio. * \return outlier ratio */ @@ -630,6 +654,13 @@ class NormalDistributionsTransform * 2009]. */ Eigen::Matrix point_hessian_; +private: + /** \brief The number of threads the scheduler should use. */ + unsigned int threads_{1}; + + /** \brief The method used for nearest neighbor search. */ + NeighborSearchMethod search_method_{NeighborSearchMethod::KDTREE}; + public: PCL_MAKE_ALIGNED_OPERATOR_NEW }; diff --git a/test/registration/test_ndt.cpp b/test/registration/test_ndt.cpp index 91276c04f9e..b26aa71546a 100644 --- a/test/registration/test_ndt.cpp +++ b/test/registration/test_ndt.cpp @@ -60,6 +60,8 @@ TEST (PCL, NormalDistributionsTransform) PointCloud output; NormalDistributionsTransform reg; + reg.setNeighborhoodSearchMethod(NeighborSearchMethod::KDTREE); + reg.setNumberOfThreads(1); reg.setStepSize (0.05); reg.setResolution (0.025f); reg.setInputSource (src); From bb01879f19ac466ccfd8c6f97cfa53eb937a88d9 Mon Sep 17 00:00:00 2001 From: Nical <2731935339@qq.com> Date: Wed, 10 Dec 2025 16:02:34 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=90=9E=20fix:=20OpenMP=20Requires=20S?= =?UTF-8?q?igned=20Indices?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use signed std::ptrdiff_t instead of std::size_t --- registration/include/pcl/registration/impl/ndt.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/registration/include/pcl/registration/impl/ndt.hpp b/registration/include/pcl/registration/impl/ndt.hpp index 79bf0052a85..3b122a6ae29 100644 --- a/registration/include/pcl/registration/impl/ndt.hpp +++ b/registration/include/pcl/registration/impl/ndt.hpp @@ -224,7 +224,8 @@ NormalDistributionsTransform::computeDerivativ #pragma omp parallel for num_threads(threads_) schedule(dynamic, 32) \ shared(score, score_gradient, hessian) firstprivate(neighborhood, distances) // Update gradient and hessian for each point, line 17 in Algorithm 2 [Magnusson 2009] - for (std::size_t idx = 0; idx < input_->size(); idx++) { + for (std::ptrdiff_t idx = 0; idx < static_cast(input_->size()); + idx++) { // Transformed Point const auto& x_trans_pt = trans_cloud[idx]; From 6c0acc85e5a87d590aab07b675299f1099a04ada Mon Sep 17 00:00:00 2001 From: Nical <2731935339@qq.com> Date: Thu, 11 Dec 2025 15:15:06 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=90=9E=20fix:=20Apply=20copilot=20sug?= =?UTF-8?q?gestion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply copilot suggestion to fix some error --- registration/include/pcl/registration/impl/ndt.hpp | 9 +++++---- registration/include/pcl/registration/ndt.h | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/registration/include/pcl/registration/impl/ndt.hpp b/registration/include/pcl/registration/impl/ndt.hpp index 3b122a6ae29..c2ddb0ba41f 100644 --- a/registration/include/pcl/registration/impl/ndt.hpp +++ b/registration/include/pcl/registration/impl/ndt.hpp @@ -230,18 +230,19 @@ NormalDistributionsTransform::computeDerivativ const auto& x_trans_pt = trans_cloud[idx]; // Find neighbors with different search method + neighborhood.clear(); switch (search_method_) { - case KDTREE: + case NeighborSearchMethod::KDTREE: // Radius search has been experimentally faster than direct neighbor checking. target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances); break; - case DIRECT26: + case NeighborSearchMethod::DIRECT26: target_cells_.getNeighborhoodAtPoint(x_trans_pt, neighborhood); break; - case DIRECT7: + case NeighborSearchMethod::DIRECT7: target_cells_.getFaceNeighborsAtPoint(x_trans_pt, neighborhood); break; - case DIRECT1: + case NeighborSearchMethod::DIRECT1: target_cells_.getVoxelAtPoint(x_trans_pt, neighborhood); break; } diff --git a/registration/include/pcl/registration/ndt.h b/registration/include/pcl/registration/ndt.h index db321b0914d..e54b06df947 100644 --- a/registration/include/pcl/registration/ndt.h +++ b/registration/include/pcl/registration/ndt.h @@ -50,7 +50,7 @@ namespace pcl { -enum NeighborSearchMethod { +enum class NeighborSearchMethod { KDTREE, // Original DIRECT26, // VoxelGridCovariance::getNeighborhoodAtPoint DIRECT7, // VoxelGridCovariance::getFaceNeighborsAtPoint @@ -187,7 +187,7 @@ class NormalDistributionsTransform setNumberOfThreads(unsigned int nr_threads = 0); /** \brief Set neighborhood search method. - * \param[in] method neighborhood serach method + * \param[in] method neighborhood search method */ inline void setNeighborhoodSearchMethod(const NeighborSearchMethod& method)