Skip to content

Commit b36da8a

Browse files
committed
Change parallelism paradigm to be determinist
1 parent 245c73b commit b36da8a

File tree

3 files changed

+77
-26
lines changed

3 files changed

+77
-26
lines changed

modules/tracker/rbt/src/features/vpRBDenseDepthTracker.cpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,22 @@ void vpRBDenseDepthTracker::extractFeatures(const vpRBFeatureTrackerInput &frame
6767
m_depthPoints.clear();
6868
m_depthPoints.reserve(static_cast<size_t>(bb.getArea() / (m_step * m_step * 2)));
6969

70+
std::vector<std::vector<vpDepthPoint>> pointsPerThread;
71+
#ifdef VISP_HAVE_OPENMP
72+
const unsigned int numThreads = omp_get_num_threads();
73+
#else
74+
const unsigned int numThreads = 1;
75+
#endif
76+
pointsPerThread.resize(numThreads);
7077
#ifdef VISP_HAVE_OPENMP
7178
#pragma omp parallel
7279
#endif
7380
{
81+
#ifdef VISP_HAVE_OPENMP
82+
unsigned int threadIdx = omp_get_thread_num();
83+
#else
84+
unsigned int threadIdx = 1;
85+
#endif
7486
vpDepthPoint point;
7587
vpColVector cameraRay(3);
7688
#ifdef VISP_HAVE_OPENMP
@@ -132,13 +144,10 @@ void vpRBDenseDepthTracker::extractFeatures(const vpRBFeatureTrackerInput &frame
132144
}
133145
}
134146

135-
// If we use openmp, add to the global vector. If we're not using openmp, no need to do so
136-
#ifdef VISP_HAVE_OPENMP
137-
#pragma omp critical
138-
{
139-
m_depthPoints.insert(m_depthPoints.end(), localPoints.begin(), localPoints.end());
140-
}
141-
#endif
147+
pointsPerThread[threadIdx] = std::move(localPoints);
148+
}
149+
for (const std::vector<vpDepthPoint> &points: pointsPerThread) {
150+
m_depthPoints.insert(m_depthPoints.end(), std::make_move_iterator(points.begin()), std::make_move_iterator(points.end()));
142151
}
143152
m_depthPointSet.build(m_depthPoints);
144153

modules/tracker/rbt/src/features/vpRBSilhouetteCCDTracker.cpp

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,22 @@ void vpRBSilhouetteCCDTracker::extractFeatures(const vpRBFeatureTrackerInput &fr
166166
const vpHomogeneousMatrix cMo = frame.renders.cMo;
167167
const vpHomogeneousMatrix oMc = cMo.inverse();
168168

169+
std::vector<std::vector<vpRBSilhouetteControlPoint>> pointsPerThread;
170+
#ifdef VISP_HAVE_OPENMP
171+
pointsPerThread.resize(omp_get_num_threads());
172+
#else
173+
pointsPerThread.resize(1);
174+
#endif
175+
169176
#ifdef VISP_HAVE_OPENMP
170177
#pragma omp parallel
171178
#endif
172179
{
180+
#ifdef VISP_HAVE_OPENMP
181+
unsigned int threadIdx = omp_get_thread_num();
182+
#else
183+
unsigned int threadIdx = 0;
184+
#endif
173185
std::vector<vpRBSilhouetteControlPoint> localControlPoints;
174186
#ifdef VISP_HAVE_OPENMP
175187
#pragma omp for nowait
@@ -205,14 +217,21 @@ void vpRBSilhouetteCCDTracker::extractFeatures(const vpRBFeatureTrackerInput &fr
205217
}
206218
localControlPoints.push_back(std::move(pccd));
207219
}
208-
#ifdef VISP_HAVE_OPENMP
209-
#pragma omp critical
210-
#endif
220+
221+
211222
{
212-
m_controlPoints.insert(m_controlPoints.end(), localControlPoints.begin(), localControlPoints.end());
223+
pointsPerThread[threadIdx] = std::move(localControlPoints);
213224
}
214225
}
226+
unsigned int numElements = 0;
227+
for (const std::vector<vpRBSilhouetteControlPoint> &points: pointsPerThread) {
228+
numElements += points.size();
229+
}
215230

231+
m_controlPoints.reserve(numElements);
232+
for (const std::vector<vpRBSilhouetteControlPoint> &points: pointsPerThread) {
233+
m_controlPoints.insert(m_controlPoints.end(), points.begin(), points.end());
234+
}
216235

217236
if (m_maxPoints > 0 && m_controlPoints.size() > m_maxPoints) {
218237
std::vector<size_t> keptIndices(m_maxPoints);
@@ -798,13 +817,28 @@ void vpRBSilhouetteCCDTracker::computeErrorAndInteractionMatrix()
798817
m_weights[i * 2 * normal_points_number * 3 + j] = weightPerPoint[i];
799818
}
800819
}
820+
std::vector<vpColVector> gradientPerThread;
821+
std::vector<vpMatrix> hessianPerThread;
822+
#ifdef VISP_HAVE_OPENMP
823+
unsigned int numThreads = omp_get_num_threads();
824+
#else
825+
unsigned int numThreads = 1;
826+
#endif
827+
828+
gradientPerThread.resize(omp_get_num_threads());
829+
hessianPerThread.resize(omp_get_num_threads());
801830

802831
m_gradient = 0.0;
803832
m_hessian = 0.0;
804833
#ifdef VISP_HAVE_OPENMP
805834
#pragma omp parallel
806835
#endif
807836
{
837+
#ifdef VISP_HAVE_OPENMP
838+
unsigned int threadIdx = omp_get_thread_num();
839+
#else
840+
unsigned int threadIdx = 1;
841+
#endif
808842
vpColVector localGradient(m_gradient.getRows(), 0.0);
809843
vpMatrix localHessian(m_hessian.getRows(), m_hessian.getCols(), 0.0);
810844

@@ -826,15 +860,15 @@ void vpRBSilhouetteCCDTracker::computeErrorAndInteractionMatrix()
826860
}
827861
}
828862
}
829-
#ifdef VISP_HAVE_OPENMP
830-
#pragma omp critical
831-
#endif
832-
{
833-
m_gradient += localGradient;
834-
m_hessian += localHessian;
835-
}
863+
864+
gradientPerThread[threadIdx] = localGradient;
865+
hessianPerThread[threadIdx] = localHessian;
836866
}
837867

868+
for (unsigned int i = 0; i < gradientPerThread.size(); ++i) {
869+
m_gradient += gradientPerThread[i];
870+
m_hessian += hessianPerThread[i];
871+
}
838872

839873
m_LTL = m_hessian;
840874
m_LTR = -m_gradient;
@@ -864,7 +898,6 @@ void vpRBSilhouetteCCDTracker::computeErrorAndInteractionMatrix()
864898
m_LTL = 0;
865899
m_LTR = 0;
866900

867-
868901
std::cerr << e.what() << std::endl;
869902
}
870903
}

modules/tracker/rbt/src/vo/vpPointMap.cpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,23 @@ void vpPointMap::getVisiblePoints(const unsigned int h, const unsigned int w, co
130130
vpMatrix cX(m_X.getRows(), m_X.getCols());
131131
vpMatrix::mult2Matrices(m_X, cRw.t(), cX);
132132

133+
std::vector<std::vector<int>> indicesPerThread;
134+
#ifdef VISP_HAVE_OPENMP
135+
const unsigned int numThreads = omp_get_num_threads();
136+
#else
137+
const unsigned int numThreads = 1;
138+
#endif
139+
indicesPerThread.resize(numThreads);
133140

134141
#ifdef VISP_HAVE_OPENMP
135142
#pragma omp parallel
136143
#endif
137144
{
145+
#ifdef VISP_HAVE_OPENMP
146+
unsigned int threadIdx = omp_get_thread_num();
147+
#else
148+
unsigned int threadIdx = 1;
149+
#endif
138150
std::vector<int> localIndices;
139151
double u, v;
140152

@@ -158,15 +170,12 @@ void vpPointMap::getVisiblePoints(const unsigned int h, const unsigned int w, co
158170
if (fabs(Z - depth[vint][uint]) > m_maxDepthErrorVisible) {
159171
continue;
160172
}
161-
162173
localIndices.push_back(i);
163174
}
164-
#ifdef VISP_HAVE_OPENMP
165-
#pragma omp critical
166-
#endif
167-
{
168-
indices.insert(indices.end(), std::make_move_iterator(localIndices.begin()), std::make_move_iterator(localIndices.end()));
169-
}
175+
indicesPerThread[threadIdx] = std::move(localIndices);
176+
}
177+
for (const std::vector<int> &indicesPart: indicesPerThread) {
178+
indices.insert(indices.end(), std::make_move_iterator(indicesPart.begin()), std::make_move_iterator(indicesPart.end()));
170179
}
171180
}
172181

0 commit comments

Comments
 (0)