diff --git a/registration/include/pcl/registration/impl/ndt.hpp b/registration/include/pcl/registration/impl/ndt.hpp index f831c3f45a4..c2ddb0ba41f 100644 --- a/registration/include/pcl/registration/impl/ndt.hpp +++ b/registration/include/pcl/registration/impl/ndt.hpp @@ -43,6 +43,28 @@ namespace pcl { +template +void +NormalDistributionsTransform::setNumberOfThreads( + unsigned int nr_threads) +{ +#ifdef _OPENMP + if (nr_threads == 0) + threads_ = omp_get_num_procs(); + else + threads_ = nr_threads; + PCL_DEBUG("[pcl::NormalDistributionsTransform::setNumberOfThreads] Setting " + "number of threads to %u.\n", + threads_); +#else + threads_ = 1; + if (nr_threads != 1) + PCL_WARN("[pcl::NormalDistributionsTransform::setNumberOfThreads] " + "Parallelization is requested, but OpenMP is not available! Continuing " + "without parallelization.\n"); +#endif // _OPENMP +} + template NormalDistributionsTransform:: NormalDistributionsTransform() @@ -196,25 +218,43 @@ NormalDistributionsTransform::computeDerivativ // Precompute Angular Derivatives (eq. 6.19 and 6.21)[Magnusson 2009] computeAngleDerivatives(transform); + std::vector neighborhood; + std::vector distances; + +#pragma omp parallel for num_threads(threads_) schedule(dynamic, 32) \ + shared(score, score_gradient, hessian) firstprivate(neighborhood, distances) // Update gradient and hessian for each point, line 17 in Algorithm 2 [Magnusson 2009] - for (std::size_t idx = 0; idx < input_->size(); idx++) { + for (std::ptrdiff_t idx = 0; idx < static_cast(input_->size()); + idx++) { // Transformed Point const auto& x_trans_pt = trans_cloud[idx]; - // Find neighbors (Radius search has been experimentally faster than direct neighbor - // checking. - std::vector neighborhood; - std::vector distances; - target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances); + // Find neighbors with different search method + neighborhood.clear(); + switch (search_method_) { + case NeighborSearchMethod::KDTREE: + // Radius search has been experimentally faster than direct neighbor checking. + target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances); + break; + case NeighborSearchMethod::DIRECT26: + target_cells_.getNeighborhoodAtPoint(x_trans_pt, neighborhood); + break; + case NeighborSearchMethod::DIRECT7: + target_cells_.getFaceNeighborsAtPoint(x_trans_pt, neighborhood); + break; + case NeighborSearchMethod::DIRECT1: + target_cells_.getVoxelAtPoint(x_trans_pt, neighborhood); + break; + } - for (const auto& cell : neighborhood) { - // Original Point - const auto& x_pt = (*input_)[idx]; - const Eigen::Vector3d x = x_pt.getVector3fMap().template cast(); + // Original Point + const Eigen::Vector3d x = (*input_)[idx].getVector3fMap().template cast(); + const Eigen::Vector3d x_trans_position = + x_trans_pt.getVector3fMap().template cast(); + for (const auto& cell : neighborhood) { // Denorm point, x_k' in Equations 6.12 and 6.13 [Magnusson 2009] - const Eigen::Vector3d x_trans = - x_trans_pt.getVector3fMap().template cast() - cell->getMean(); + const Eigen::Vector3d x_trans = x_trans_position - cell->getMean(); // Inverse Covariance of Occupied Voxel // Uses precomputed covariance for speed. const Eigen::Matrix3d c_inv = cell->getInverseCov(); diff --git a/registration/include/pcl/registration/ndt.h b/registration/include/pcl/registration/ndt.h index 26115717bd7..e54b06df947 100644 --- a/registration/include/pcl/registration/ndt.h +++ b/registration/include/pcl/registration/ndt.h @@ -49,6 +49,14 @@ #include namespace pcl { + +enum class NeighborSearchMethod { + KDTREE, // Original + DIRECT26, // VoxelGridCovariance::getNeighborhoodAtPoint + DIRECT7, // VoxelGridCovariance::getFaceNeighborsAtPoint + DIRECT1 // VoxelGridCovariance::getVoxelAtPoint +}; + /** \brief A 3D Normal Distribution Transform registration implementation for * point cloud data. * \note For more information please see Magnusson, M. (2009). The @@ -171,6 +179,22 @@ class NormalDistributionsTransform step_size_ = step_size; } + /** \brief Initialize the scheduler and set the number of threads to use. + * \param[in] nr_threads the number of hardware threads to use (0 sets the value back + * to automatic) + */ + inline void + setNumberOfThreads(unsigned int nr_threads = 0); + + /** \brief Set neighborhood search method. + * \param[in] method neighborhood search method + */ + inline void + setNeighborhoodSearchMethod(const NeighborSearchMethod& method) + { + search_method_ = method; + } + /** \brief Get the point cloud outlier ratio. * \return outlier ratio */ @@ -630,6 +654,13 @@ class NormalDistributionsTransform * 2009]. */ Eigen::Matrix point_hessian_; +private: + /** \brief The number of threads the scheduler should use. */ + unsigned int threads_{1}; + + /** \brief The method used for nearest neighbor search. */ + NeighborSearchMethod search_method_{NeighborSearchMethod::KDTREE}; + public: PCL_MAKE_ALIGNED_OPERATOR_NEW }; diff --git a/test/registration/test_ndt.cpp b/test/registration/test_ndt.cpp index 91276c04f9e..b26aa71546a 100644 --- a/test/registration/test_ndt.cpp +++ b/test/registration/test_ndt.cpp @@ -60,6 +60,8 @@ TEST (PCL, NormalDistributionsTransform) PointCloud output; NormalDistributionsTransform reg; + reg.setNeighborhoodSearchMethod(NeighborSearchMethod::KDTREE); + reg.setNumberOfThreads(1); reg.setStepSize (0.05); reg.setResolution (0.025f); reg.setInputSource (src);