Skip to content

Commit ab14bc2

Browse files
feat: implement PCA-based normal estimation with parallel processing and comprehensive benchmarks
This commit adds a complete PCA-based normal estimation algorithm for point clouds using KNN and Eigen for robust mathematical computations. ## Core Implementation - **PCA Normal Estimator**: Template-based design supporting different data types and KNN algorithms - **Parallel Processing**: Optional parallel execution using thread pool singleton with 5-11x speedup - **Robust PCA**: Uses Eigen's SelfAdjointEigenSolver for numerical stability - **Multiple KNN Support**: Works with KDTree, Brute Force, and Brute Force Parallel algorithms ## Key Features - **Accuracy**: Maintains <11 degrees error on planar surfaces - **Performance**: KDTree processes 10K points in ~21ms (sequential), ~3ms (parallel) - **Scalability**: Better parallel efficiency with larger point clouds (up to 11x speedup) - **Edge Case Handling**: Graceful fallback for insufficient neighbors or computation failures ## Files Added - `src/include/cpp-toolbox/pcl/norm/base_norm.hpp`: CRTP base class for normal extractors - `src/include/cpp-toolbox/pcl/norm/pca_norm.hpp`: PCA normal extractor interface - `src/include/cpp-toolbox/pcl/norm/impl/pca_norm_impl.hpp`: Complete PCA implementation with Eigen - `test/pcl/norm_test.cpp`: Comprehensive tests (668 assertions covering functionality, accuracy, edge cases) - `benchmark/pcl/norm_benc.cpp`: Detailed parallel vs sequential performance benchmarks ## Build System Updates - Added Eigen dependency to test target for matrix operations - Updated CMakeLists.txt files to include new norm tests and benchmarks - Fixed compilation issues with non-copyable KNN objects ## Code Style Improvements - Unified naming convention: `kParallelThreshold` → `k_parallel_threshold` - Reorganized includes in KNN headers for consistency ## Performance Results - **1K points**: 5.5x speedup (1.32ms → 0.24ms) - **5K points**: 6.6x speedup (9.35ms → 1.42ms) - **10K points**: 7.3x speedup (21.2ms → 2.92ms) - **25K points**: 11.1x speedup (65.9ms → 5.96ms) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 6251277 commit ab14bc2

File tree

11 files changed

+1105
-90
lines changed

11 files changed

+1105
-90
lines changed

benchmark/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ list(APPEND BENCHMARK_FILES
77
file/memory_mapped_file_benc.cpp
88
pcl/downsampling_benc.cpp
99
pcl/knn_benc.cpp
10+
pcl/norm_benc.cpp
1011
${CMAKE_SOURCE_DIR}/test/my_catch2_main.cpp
1112
)
1213

benchmark/pcl/norm_benc.cpp

Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
#include <catch2/benchmark/catch_benchmark.hpp>
2+
#include <catch2/catch_test_macros.hpp>
3+
4+
#include <cpp-toolbox/pcl/norm/pca_norm.hpp>
5+
#include <cpp-toolbox/pcl/knn/kdtree.hpp>
6+
#include <cpp-toolbox/pcl/knn/bfknn.hpp>
7+
#include <cpp-toolbox/pcl/knn/bfknn_parallel.hpp>
8+
#include <cpp-toolbox/utils/random.hpp>
9+
#include <cpp-toolbox/utils/timer.hpp>
10+
11+
#include <iostream>
12+
#include <memory>
13+
#include <iomanip>
14+
#include <cmath>
15+
16+
using namespace toolbox::pcl;
17+
using namespace toolbox::types;
18+
19+
// Helper function to generate random point cloud
20+
template<typename T>
21+
point_cloud_t<T> generate_benchmark_cloud(std::size_t num_points, T min_val = -100, T max_val = 100)
22+
{
23+
point_cloud_t<T> cloud;
24+
cloud.points.reserve(num_points);
25+
26+
toolbox::utils::random_t rng;
27+
28+
for (std::size_t i = 0; i < num_points; ++i)
29+
{
30+
point_t<T> pt;
31+
pt.x = rng.random<T>(min_val, max_val);
32+
pt.y = rng.random<T>(min_val, max_val);
33+
pt.z = rng.random<T>(min_val, max_val);
34+
cloud.points.push_back(pt);
35+
}
36+
37+
return cloud;
38+
}
39+
40+
// Helper function to create a planar point cloud for more realistic benchmarking
41+
template<typename T>
42+
point_cloud_t<T> generate_planar_benchmark_cloud(std::size_t num_points, T extent = 50.0)
43+
{
44+
point_cloud_t<T> cloud;
45+
cloud.points.reserve(num_points);
46+
47+
toolbox::utils::random_t rng;
48+
49+
// Generate points on a plane with some noise
50+
for (std::size_t i = 0; i < num_points; ++i)
51+
{
52+
T x = rng.random<T>(-extent, extent);
53+
T y = rng.random<T>(-extent, extent);
54+
T z = rng.random<T>(-2.0, 2.0); // Small noise in z direction
55+
56+
cloud.points.emplace_back(x, y, z);
57+
}
58+
59+
return cloud;
60+
}
61+
62+
// Helper function to generate a spherical point cloud
63+
template<typename T>
64+
point_cloud_t<T> generate_spherical_benchmark_cloud(std::size_t num_points, T radius = 50.0)
65+
{
66+
point_cloud_t<T> cloud;
67+
cloud.points.reserve(num_points);
68+
69+
toolbox::utils::random_t rng;
70+
71+
for (std::size_t i = 0; i < num_points; ++i)
72+
{
73+
// Generate random point on sphere surface
74+
T theta = rng.random<T>(0, 2 * M_PI);
75+
T phi = rng.random<T>(0, M_PI);
76+
77+
T x = radius * std::sin(phi) * std::cos(theta);
78+
T y = radius * std::sin(phi) * std::sin(theta);
79+
T z = radius * std::cos(phi);
80+
81+
cloud.points.emplace_back(x, y, z);
82+
}
83+
84+
return cloud;
85+
}
86+
87+
template<typename DataType, typename KNN>
88+
void benchmark_norm_computation(const std::string& test_name,
89+
const point_cloud_t<DataType>& cloud,
90+
KNN& knn,
91+
std::size_t num_neighbors,
92+
bool enable_parallel)
93+
{
94+
pca_norm_extractor_t<DataType, KNN> norm_extractor;
95+
norm_extractor.set_input(cloud);
96+
norm_extractor.set_knn(knn);
97+
norm_extractor.set_num_neighbors(num_neighbors);
98+
norm_extractor.enable_parallel(enable_parallel);
99+
100+
std::string parallel_suffix = enable_parallel ? " (Parallel)" : " (Sequential)";
101+
102+
BENCHMARK(test_name + parallel_suffix)
103+
{
104+
return norm_extractor.extract();
105+
};
106+
}
107+
108+
TEST_CASE("PCA Normal Estimation Parallel vs Sequential Benchmarks", "[benchmark][pcl][norm]")
109+
{
110+
using data_type = float;
111+
constexpr std::size_t num_neighbors = 15;
112+
113+
SECTION("Small Point Cloud (1K points)")
114+
{
115+
auto cloud = generate_benchmark_cloud<data_type>(1000);
116+
INFO("Testing with " << cloud.size() << " points, " << num_neighbors << " neighbors");
117+
118+
auto kdtree1 = kdtree_t<data_type>{};
119+
auto kdtree2 = kdtree_t<data_type>{};
120+
121+
benchmark_norm_computation("KDTree Small Cloud", cloud, kdtree1, num_neighbors, false);
122+
benchmark_norm_computation("KDTree Small Cloud", cloud, kdtree2, num_neighbors, true);
123+
}
124+
125+
SECTION("Medium Point Cloud (5K points)")
126+
{
127+
auto cloud = generate_benchmark_cloud<data_type>(5000);
128+
INFO("Testing with " << cloud.size() << " points, " << num_neighbors << " neighbors");
129+
130+
auto kdtree1 = kdtree_t<data_type>{};
131+
auto kdtree2 = kdtree_t<data_type>{};
132+
133+
benchmark_norm_computation("KDTree Medium Cloud", cloud, kdtree1, num_neighbors, false);
134+
benchmark_norm_computation("KDTree Medium Cloud", cloud, kdtree2, num_neighbors, true);
135+
}
136+
137+
SECTION("Large Point Cloud (10K points)")
138+
{
139+
auto cloud = generate_benchmark_cloud<data_type>(10000);
140+
INFO("Testing with " << cloud.size() << " points, " << num_neighbors << " neighbors");
141+
142+
auto kdtree1 = kdtree_t<data_type>{};
143+
auto kdtree2 = kdtree_t<data_type>{};
144+
145+
benchmark_norm_computation("KDTree Large Cloud", cloud, kdtree1, num_neighbors, false);
146+
benchmark_norm_computation("KDTree Large Cloud", cloud, kdtree2, num_neighbors, true);
147+
}
148+
149+
SECTION("Very Large Point Cloud (25K points)")
150+
{
151+
auto cloud = generate_benchmark_cloud<data_type>(25000);
152+
INFO("Testing with " << cloud.size() << " points, " << num_neighbors << " neighbors");
153+
154+
auto kdtree1 = kdtree_t<data_type>{};
155+
auto kdtree2 = kdtree_t<data_type>{};
156+
157+
benchmark_norm_computation("KDTree Very Large Cloud", cloud, kdtree1, num_neighbors, false);
158+
benchmark_norm_computation("KDTree Very Large Cloud", cloud, kdtree2, num_neighbors, true);
159+
}
160+
}
161+
162+
TEST_CASE("PCA Normal Estimation - Different Point Cloud Types", "[benchmark][pcl][norm]")
163+
{
164+
using data_type = float;
165+
constexpr std::size_t num_neighbors = 12;
166+
constexpr std::size_t num_points = 8000;
167+
168+
SECTION("Random Point Cloud")
169+
{
170+
auto cloud = generate_benchmark_cloud<data_type>(num_points);
171+
INFO("Testing random cloud with " << cloud.size() << " points");
172+
173+
auto kdtree1 = kdtree_t<data_type>{};
174+
auto kdtree2 = kdtree_t<data_type>{};
175+
176+
benchmark_norm_computation("Random Cloud", cloud, kdtree1, num_neighbors, false);
177+
benchmark_norm_computation("Random Cloud", cloud, kdtree2, num_neighbors, true);
178+
}
179+
180+
SECTION("Planar Point Cloud")
181+
{
182+
auto cloud = generate_planar_benchmark_cloud<data_type>(num_points);
183+
INFO("Testing planar cloud with " << cloud.size() << " points");
184+
185+
auto kdtree1 = kdtree_t<data_type>{};
186+
auto kdtree2 = kdtree_t<data_type>{};
187+
188+
benchmark_norm_computation("Planar Cloud", cloud, kdtree1, num_neighbors, false);
189+
benchmark_norm_computation("Planar Cloud", cloud, kdtree2, num_neighbors, true);
190+
}
191+
192+
SECTION("Spherical Point Cloud")
193+
{
194+
auto cloud = generate_spherical_benchmark_cloud<data_type>(num_points);
195+
INFO("Testing spherical cloud with " << cloud.size() << " points");
196+
197+
auto kdtree1 = kdtree_t<data_type>{};
198+
auto kdtree2 = kdtree_t<data_type>{};
199+
200+
benchmark_norm_computation("Spherical Cloud", cloud, kdtree1, num_neighbors, false);
201+
benchmark_norm_computation("Spherical Cloud", cloud, kdtree2, num_neighbors, true);
202+
}
203+
}
204+
205+
TEST_CASE("PCA Normal Estimation - KNN Algorithm Comparison", "[benchmark][pcl][norm]")
206+
{
207+
using data_type = float;
208+
constexpr std::size_t num_neighbors = 10;
209+
constexpr std::size_t num_points = 5000;
210+
211+
auto cloud = generate_benchmark_cloud<data_type>(num_points);
212+
INFO("Comparing KNN algorithms with " << cloud.size() << " points");
213+
214+
SECTION("KDTree vs Brute Force - Sequential")
215+
{
216+
auto kdtree = kdtree_t<data_type>{};
217+
auto bfknn = bfknn_t<data_type>{};
218+
219+
benchmark_norm_computation("KDTree", cloud, kdtree, num_neighbors, false);
220+
benchmark_norm_computation("Brute Force", cloud, bfknn, num_neighbors, false);
221+
}
222+
223+
SECTION("KDTree vs Brute Force - Parallel")
224+
{
225+
auto kdtree = kdtree_t<data_type>{};
226+
auto bfknn_parallel = bfknn_parallel_t<data_type>{};
227+
228+
benchmark_norm_computation("KDTree", cloud, kdtree, num_neighbors, true);
229+
benchmark_norm_computation("Brute Force Parallel", cloud, bfknn_parallel, num_neighbors, false);
230+
}
231+
}
232+
233+
TEST_CASE("PCA Normal Estimation - Neighbor Count Impact", "[benchmark][pcl][norm]")
234+
{
235+
using data_type = float;
236+
constexpr std::size_t num_points = 6000;
237+
238+
auto cloud = generate_benchmark_cloud<data_type>(num_points);
239+
INFO("Testing neighbor count impact with " << cloud.size() << " points");
240+
241+
SECTION("Sequential - Different Neighbor Counts")
242+
{
243+
auto kdtree_5 = kdtree_t<data_type>{};
244+
auto kdtree_10 = kdtree_t<data_type>{};
245+
auto kdtree_20 = kdtree_t<data_type>{};
246+
auto kdtree_30 = kdtree_t<data_type>{};
247+
248+
benchmark_norm_computation("5 Neighbors", cloud, kdtree_5, 5, false);
249+
benchmark_norm_computation("10 Neighbors", cloud, kdtree_10, 10, false);
250+
benchmark_norm_computation("20 Neighbors", cloud, kdtree_20, 20, false);
251+
benchmark_norm_computation("30 Neighbors", cloud, kdtree_30, 30, false);
252+
}
253+
254+
SECTION("Parallel - Different Neighbor Counts")
255+
{
256+
auto kdtree_5 = kdtree_t<data_type>{};
257+
auto kdtree_10 = kdtree_t<data_type>{};
258+
auto kdtree_20 = kdtree_t<data_type>{};
259+
auto kdtree_30 = kdtree_t<data_type>{};
260+
261+
benchmark_norm_computation("5 Neighbors", cloud, kdtree_5, 5, true);
262+
benchmark_norm_computation("10 Neighbors", cloud, kdtree_10, 10, true);
263+
benchmark_norm_computation("20 Neighbors", cloud, kdtree_20, 20, true);
264+
benchmark_norm_computation("30 Neighbors", cloud, kdtree_30, 30, true);
265+
}
266+
}
267+
268+
TEST_CASE("PCA Normal Estimation - Parallel Speedup Analysis", "[benchmark][pcl][norm]")
269+
{
270+
using data_type = float;
271+
constexpr std::size_t num_neighbors = 15;
272+
273+
// Test with different cloud sizes to analyze scaling
274+
std::vector<std::size_t> cloud_sizes = {2000, 4000, 8000, 15000, 30000};
275+
276+
for (auto size : cloud_sizes) {
277+
DYNAMIC_SECTION("Cloud Size: " << size << " points") {
278+
auto cloud = generate_benchmark_cloud<data_type>(size);
279+
INFO("Analyzing parallel speedup with " << cloud.size() << " points");
280+
281+
auto kdtree1 = kdtree_t<data_type>{};
282+
auto kdtree2 = kdtree_t<data_type>{};
283+
284+
std::string size_label = "Size " + std::to_string(size);
285+
benchmark_norm_computation(size_label, cloud, kdtree1, num_neighbors, false);
286+
benchmark_norm_computation(size_label, cloud, kdtree2, num_neighbors, true);
287+
}
288+
}
289+
}

src/include/cpp-toolbox/pcl/filters/impl/voxel_grid_downsampling_impl.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,10 @@ void voxel_grid_downsampling_t<DataType>::compute_point_cloud_bounds()
9090
}
9191

9292
// 使用现有的 minmax 工具计算点云边界
93-
constexpr std::size_t kParallelThreshold = 1024;
93+
constexpr std::size_t k_parallel_threshold = 1024;
9494

9595
// 计算点云边界
96-
auto bounds = m_enable_parallel && m_cloud->size() > kParallelThreshold
96+
auto bounds = m_enable_parallel && m_cloud->size() > k_parallel_threshold
9797
? toolbox::types::calculate_minmax_parallel(*m_cloud)
9898
: toolbox::types::calculate_minmax(*m_cloud);
9999

@@ -374,7 +374,7 @@ void voxel_grid_downsampling_t<DataType>::filter_impl(point_cloud_ptr output)
374374
}
375375

376376
// 定义常量
377-
constexpr std::size_t kParallelThreshold = 1024;
377+
constexpr std::size_t k_parallel_threshold = 1024;
378378
constexpr std::size_t kMaxVoxelsPerThread = 1000;
379379

380380
const std::size_t total_points = m_cloud->size();
@@ -383,7 +383,7 @@ void voxel_grid_downsampling_t<DataType>::filter_impl(point_cloud_ptr output)
383383

384384
// 确定线程数量
385385
const std::size_t num_threads =
386-
m_enable_parallel && total_points > kParallelThreshold
386+
m_enable_parallel && total_points > k_parallel_threshold
387387
? std::thread::hardware_concurrency()
388388
: 1;
389389

@@ -403,7 +403,7 @@ void voxel_grid_downsampling_t<DataType>::filter_impl(point_cloud_ptr output)
403403
}
404404

405405
// 并行或串行处理点云
406-
if (m_enable_parallel && total_points > kParallelThreshold) {
406+
if (m_enable_parallel && total_points > k_parallel_threshold) {
407407
// 计算每个线程处理的点数
408408
const std::size_t points_per_thread =
409409
(total_points + num_threads - 1) / num_threads;
@@ -485,7 +485,7 @@ void voxel_grid_downsampling_t<DataType>::filter_impl(point_cloud_ptr output)
485485
output->intensity = m_cloud->intensity;
486486

487487
// 并行或串行计算质心
488-
if (m_enable_parallel && num_voxels > kParallelThreshold) {
488+
if (m_enable_parallel && num_voxels > k_parallel_threshold) {
489489
std::vector<std::future<void>> futures;
490490
futures.reserve(num_threads);
491491

src/include/cpp-toolbox/pcl/knn/bfknn_parallel.hpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
#pragma once
22

3-
#include <cpp-toolbox/pcl/knn/base_knn.hpp>
43
#include <cpp-toolbox/concurrent/parallel.hpp>
4+
#include <cpp-toolbox/pcl/knn/base_knn.hpp>
55

66
namespace toolbox::pcl
77
{
88

99
template<typename DataType>
10-
class CPP_TOOLBOX_EXPORT bfknn_parallel_t : public base_knn_t<bfknn_parallel_t<DataType>, DataType>
10+
class CPP_TOOLBOX_EXPORT bfknn_parallel_t
11+
: public base_knn_t<bfknn_parallel_t<DataType>, DataType>
1112
{
1213
public:
1314
using data_type = DataType;
@@ -38,7 +39,10 @@ class CPP_TOOLBOX_EXPORT bfknn_parallel_t : public base_knn_t<bfknn_parallel_t<D
3839
std::vector<data_type>& distances);
3940

4041
void enable_parallel(bool enable) { m_parallel_enabled = enable; }
41-
[[nodiscard]] bool is_parallel_enabled() const noexcept { return m_parallel_enabled; }
42+
[[nodiscard]] bool is_parallel_enabled() const noexcept
43+
{
44+
return m_parallel_enabled;
45+
}
4246

4347
private:
4448
data_type compute_distance(const point_t<data_type>& p1,
@@ -48,7 +52,7 @@ class CPP_TOOLBOX_EXPORT bfknn_parallel_t : public base_knn_t<bfknn_parallel_t<D
4852
point_cloud_ptr m_cloud;
4953
metric_type_t m_metric = metric_type_t::euclidean;
5054
bool m_parallel_enabled = true;
51-
static constexpr std::size_t kParallelThreshold = 1024;
55+
static constexpr std::size_t k_parallel_threshold = 1024;
5256
};
5357

5458
} // namespace toolbox::pcl

0 commit comments

Comments
 (0)