Skip to content

Commit 60eb10d

Browse files
sbryngelsonclaude
andcommitted
Fix O4 stencil support and CodeRabbit review issues
- Fix unreachable O4 skew path: reorder if-else chain to check use_O4 && use_skew before plain use_skew (was dead code) - Fix multi-layer ghost cell Dirichlet BCs: use Ng+g indexing for proper linear extrapolation through boundary - Fix config_ vs config parameter bug in space_order safety check - Fix RHS norm computation: use Ng-based loop bounds instead of hardcoded 1..Nx (supports O4 with Ng=2) - Fix V-cycle graph config: use grid.Ng instead of hardcoded 1 - Add perf_bench for branch performance comparisons Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent de074c0 commit 60eb10d

File tree

3 files changed

+74
-68
lines changed

3 files changed

+74
-68
lines changed

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,10 @@ if(BUILD_TESTS)
548548
target_link_libraries(bench_mg_cuda_graphs PRIVATE nn_cfd_core)
549549
endif()
550550

551+
# Performance benchmark for branch comparisons
552+
add_executable(perf_bench perf_bench.cpp)
553+
target_link_libraries(perf_bench PRIVATE nn_cfd_core)
554+
551555
# =========================================================================
552556
# Convenience targets for running tests
553557
# =========================================================================

src/poisson_solver_multigrid.cpp

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -230,17 +230,17 @@ void MultigridPoissonSolver::apply_bc(int level) {
230230
u_ptr[idx_lo] = u_ptr[j * stride + Nx + g];
231231
} else if (bc_x_lo == 1) { // Neumann (zero gradient)
232232
u_ptr[idx_lo] = u_ptr[j * stride + Ng];
233-
} else { // Dirichlet
234-
u_ptr[idx_lo] = 2.0 * dval - u_ptr[j * stride + Ng];
233+
} else { // Dirichlet - mirror ghost g through boundary using interior cell Ng+g
234+
u_ptr[idx_lo] = 2.0 * dval - u_ptr[j * stride + (Ng + g)];
235235
}
236236

237237
// Right boundary - periodic wraps to left interior
238238
if (bc_x_hi == 2) { // Periodic
239239
u_ptr[idx_hi] = u_ptr[j * stride + Ng + g];
240240
} else if (bc_x_hi == 1) { // Neumann (zero gradient)
241241
u_ptr[idx_hi] = u_ptr[j * stride + (Nx + Ng - 1)];
242-
} else { // Dirichlet
243-
u_ptr[idx_hi] = 2.0 * dval - u_ptr[j * stride + (Nx + Ng - 1)];
242+
} else { // Dirichlet - mirror ghost g through boundary using interior cell (Nx+Ng-1-g)
243+
u_ptr[idx_hi] = 2.0 * dval - u_ptr[j * stride + (Nx + Ng - 1 - g)];
244244
}
245245
}
246246
}
@@ -257,17 +257,17 @@ void MultigridPoissonSolver::apply_bc(int level) {
257257
u_ptr[idx_lo] = u_ptr[(Ny + g) * stride + i];
258258
} else if (bc_y_lo == 1) { // Neumann (zero gradient)
259259
u_ptr[idx_lo] = u_ptr[Ng * stride + i];
260-
} else { // Dirichlet
261-
u_ptr[idx_lo] = 2.0 * dval - u_ptr[Ng * stride + i];
260+
} else { // Dirichlet - mirror ghost g through boundary using interior cell Ng+g
261+
u_ptr[idx_lo] = 2.0 * dval - u_ptr[(Ng + g) * stride + i];
262262
}
263263

264264
// Top boundary - periodic wraps to bottom interior
265265
if (bc_y_hi == 2) { // Periodic
266266
u_ptr[idx_hi] = u_ptr[(Ng + g) * stride + i];
267267
} else if (bc_y_hi == 1) { // Neumann (zero gradient)
268268
u_ptr[idx_hi] = u_ptr[(Ny + Ng - 1) * stride + i];
269-
} else { // Dirichlet
270-
u_ptr[idx_hi] = 2.0 * dval - u_ptr[(Ny + Ng - 1) * stride + i];
269+
} else { // Dirichlet - mirror ghost g through boundary using interior cell (Ny+Ng-1-g)
270+
u_ptr[idx_hi] = 2.0 * dval - u_ptr[(Ny + Ng - 1 - g) * stride + i];
271271
}
272272
}
273273
}
@@ -345,17 +345,17 @@ void MultigridPoissonSolver::apply_bc(int level) {
345345
u_ptr[idx_lo] = u_ptr[k * plane_stride + j * stride + Nx + g];
346346
} else if (bc_x_lo == 1) {
347347
u_ptr[idx_lo] = u_ptr[k * plane_stride + j * stride + Ng];
348-
} else {
349-
u_ptr[idx_lo] = 2.0 * dval - u_ptr[k * plane_stride + j * stride + Ng];
348+
} else { // Dirichlet - mirror ghost g through boundary using interior cell Ng+g
349+
u_ptr[idx_lo] = 2.0 * dval - u_ptr[k * plane_stride + j * stride + (Ng + g)];
350350
}
351351

352352
// Right boundary - periodic wraps to left interior
353353
if (bc_x_hi == 2) {
354354
u_ptr[idx_hi] = u_ptr[k * plane_stride + j * stride + Ng + g];
355355
} else if (bc_x_hi == 1) {
356356
u_ptr[idx_hi] = u_ptr[k * plane_stride + j * stride + (Nx + Ng - 1)];
357-
} else {
358-
u_ptr[idx_hi] = 2.0 * dval - u_ptr[k * plane_stride + j * stride + (Nx + Ng - 1)];
357+
} else { // Dirichlet - mirror ghost g through boundary using interior cell (Nx+Ng-1-g)
358+
u_ptr[idx_hi] = 2.0 * dval - u_ptr[k * plane_stride + j * stride + (Nx + Ng - 1 - g)];
359359
}
360360
}
361361
}
@@ -374,17 +374,17 @@ void MultigridPoissonSolver::apply_bc(int level) {
374374
u_ptr[idx_lo] = u_ptr[k * plane_stride + (Ny + g) * stride + i];
375375
} else if (bc_y_lo == 1) {
376376
u_ptr[idx_lo] = u_ptr[k * plane_stride + Ng * stride + i];
377-
} else {
378-
u_ptr[idx_lo] = 2.0 * dval - u_ptr[k * plane_stride + Ng * stride + i];
377+
} else { // Dirichlet - mirror ghost g through boundary using interior cell Ng+g
378+
u_ptr[idx_lo] = 2.0 * dval - u_ptr[k * plane_stride + (Ng + g) * stride + i];
379379
}
380380

381381
// Top boundary - periodic wraps to bottom interior
382382
if (bc_y_hi == 2) {
383383
u_ptr[idx_hi] = u_ptr[k * plane_stride + (Ng + g) * stride + i];
384384
} else if (bc_y_hi == 1) {
385385
u_ptr[idx_hi] = u_ptr[k * plane_stride + (Ny + Ng - 1) * stride + i];
386-
} else {
387-
u_ptr[idx_hi] = 2.0 * dval - u_ptr[k * plane_stride + (Ny + Ng - 1) * stride + i];
386+
} else { // Dirichlet - mirror ghost g through boundary using interior cell (Ny+Ng-1-g)
387+
u_ptr[idx_hi] = 2.0 * dval - u_ptr[k * plane_stride + (Ny + Ng - 1 - g) * stride + i];
388388
}
389389
}
390390
}
@@ -403,17 +403,17 @@ void MultigridPoissonSolver::apply_bc(int level) {
403403
u_ptr[idx_lo] = u_ptr[(Nz + g) * plane_stride + j * stride + i];
404404
} else if (bc_z_lo == 1) {
405405
u_ptr[idx_lo] = u_ptr[Ng * plane_stride + j * stride + i];
406-
} else {
407-
u_ptr[idx_lo] = 2.0 * dval - u_ptr[Ng * plane_stride + j * stride + i];
406+
} else { // Dirichlet - mirror ghost g through boundary using interior cell Ng+g
407+
u_ptr[idx_lo] = 2.0 * dval - u_ptr[(Ng + g) * plane_stride + j * stride + i];
408408
}
409409

410410
// Front boundary - periodic wraps to back interior
411411
if (bc_z_hi == 2) {
412412
u_ptr[idx_hi] = u_ptr[(Ng + g) * plane_stride + j * stride + i];
413413
} else if (bc_z_hi == 1) {
414414
u_ptr[idx_hi] = u_ptr[(Nz + Ng - 1) * plane_stride + j * stride + i];
415-
} else {
416-
u_ptr[idx_hi] = 2.0 * dval - u_ptr[(Nz + Ng - 1) * plane_stride + j * stride + i];
415+
} else { // Dirichlet - mirror ghost g through boundary using interior cell (Nz+Ng-1-g)
416+
u_ptr[idx_hi] = 2.0 * dval - u_ptr[(Nz + Ng - 1 - g) * plane_stride + j * stride + i];
417417
}
418418
}
419419
}
@@ -1669,20 +1669,21 @@ int MultigridPoissonSolver::solve(const ScalarField& rhs, ScalarField& p, const
16691669
// Compute reference norms for relative tolerances (CPU path)
16701670
// ||b||_∞ = max|f| and ||b||_2 = sqrt(sum(f^2)) on finest level - store in member for diagnostics
16711671
auto& finest_cpu = *levels_[0];
1672+
const int Ng_f = finest_cpu.Ng; // Use actual Ng for correct interior indexing (O4 has Ng=2)
16721673
b_inf_ = 0.0;
16731674
double b_sum_sq = 0.0;
16741675
if (mesh_->is2D()) {
1675-
for (int j = 1; j <= finest_cpu.Ny; ++j) {
1676-
for (int i = 1; i <= finest_cpu.Nx; ++i) {
1676+
for (int j = Ng_f; j < Ng_f + finest_cpu.Ny; ++j) {
1677+
for (int i = Ng_f; i < Ng_f + finest_cpu.Nx; ++i) {
16771678
double val = finest_cpu.f(i, j);
16781679
b_inf_ = std::max(b_inf_, std::abs(val));
16791680
b_sum_sq += val * val;
16801681
}
16811682
}
16821683
} else {
1683-
for (int k = 1; k <= finest_cpu.Nz; ++k) {
1684-
for (int j = 1; j <= finest_cpu.Ny; ++j) {
1685-
for (int i = 1; i <= finest_cpu.Nx; ++i) {
1684+
for (int k = Ng_f; k < Ng_f + finest_cpu.Nz; ++k) {
1685+
for (int j = Ng_f; j < Ng_f + finest_cpu.Ny; ++j) {
1686+
for (int i = Ng_f; i < Ng_f + finest_cpu.Nx; ++i) {
16861687
double val = finest_cpu.f(i, j, k);
16871688
b_inf_ = std::max(b_inf_, std::abs(val));
16881689
b_sum_sq += val * val;
@@ -2279,7 +2280,7 @@ void MultigridPoissonSolver::initialize_vcycle_graph(int nu1, int nu2, int degre
22792280
cfg.Nx = grid.Nx;
22802281
cfg.Ny = grid.Ny;
22812282
cfg.Nz = grid.Nz;
2282-
cfg.Ng = 1;
2283+
cfg.Ng = grid.Ng; // Use actual Ng (finest level may have Ng=2 for O4 stencils)
22832284
cfg.dx2 = grid.dx * grid.dx;
22842285
cfg.dy2 = grid.dy * grid.dy;
22852286
cfg.dz2 = grid.dz * grid.dz;

0 commit comments

Comments
 (0)