laghos changes (#1138)

rfhaque · Riyaz Haque · pearce8 · web-flow · commit 6cfa83754a72 · 2026-01-05T10:04:38.000-06:00
* Changes to laghos

* Changes

* enable caliper blocks in laghos source

* laghos input params

* Update git url to main repo

* Use fixed number of time steps

* Increase end time so that code runs for max steps

* Add weak scaling

* Fix weak scaling params

* lint

* Add support for nonconforming meshes

* Default nonconforming true

* Fixes

* Change parallel refinement value

* Add throughput configs for different orders

* Add rocm-6.4.3

* Add sedov workload

* lint

* cuda fixes

* Update .codespellignore

---------

Co-authored-by: Riyaz Haque &lt;your-email@example.com&gt;
Co-authored-by: pearce8 &lt;pearce8@llnl.gov&gt;
diff --git a/.codespellignore b/.codespellignore
@@ -1,2 +1,3 @@
 cachable
-parma
+ot
+parma
diff --git a/experiments/laghos/experiment.py b/experiments/laghos/experiment.py
@@ -17,14 +17,22 @@ class Laghos(
     MpiOnlyExperiment,
     CudaExperiment,
     ROCmExperiment,
-    Scaling(ScalingMode.Strong),
+    Scaling(ScalingMode.Strong, ScalingMode.Weak, ScalingMode.Throughput),
     Caliper,
 ):
 
     variant(
         "workload",
-        default="triplept",
-        description="triplept or other problem",
+        default="sedov",
+        values=("sedov", "triplept"),
+        description="problem type",
+    )
+
+    variant(
+        "order",
+        default="linear",
+        values=("linear", "quadratic", "cubic"),
+        description="solution order",
     )
 
     variant(
@@ -40,38 +48,172 @@ class Laghos(
         description="Use GPU-aware MPI",
     )
 
+    variant(
+        "nc",
+        default=False,
+        values=(True, False),
+        description="nonconforming or conforming",
+    )
+
     maintainers("wdhawkins")
 
-    def compute_applications_section(self):
-        # "zones" defined from mesh file, we are hardcoding it here
-        self.add_experiment_variable("nx", 2, True)
-        self.add_experiment_variable("ny", 2, True)
-        self.add_experiment_variable("nz", 2, True)
-        self.add_experiment_variable("tf", 0.0033, True)
-        self.add_experiment_variable("zones", 1024, True)
+    def generate_perf_specs(self):
+        problem_spec = {
+            "nx": 1,
+            "ny": 1,
+            "nz": 1,
+            "pool_size": 16,
+            "ms": 250,
+            "tf": 10000,
+            "resource_count": 4,
+            "strong": None,
+            "weak": None,
+            "throughput": None,
+        }
+        # Add problem specs as needed here
+        if self.spec.satisfies("+throughput"):
+            if self.spec.satisfies("order=linear"):
+                problem_spec["rs"] = [4, 4, 4]
+                problem_spec["rp"] = [2, 3, 4]
+            elif self.spec.satisfies("order=quadratic"):
+                problem_spec["rs"] = [4, 4, 4]
+                problem_spec["rp"] = [1, 2, 3]
+            elif self.spec.satisfies("order=cubic"):
+                problem_spec["rs"] = [4, 4, 4]
+                problem_spec["rp"] = [1, 2, 3]
+        elif self.spec.satisfies("+strong"):
+            problem_spec["strong"] = (
+                lambda var, itr, dim, scaling_factor: var.val(dim) * scaling_factor
+            )
+            if self.spec.satisfies("order=linear"):
+                problem_spec["rs"] = 4
+                problem_spec["rp"] = 3
+            elif self.spec.satisfies("order=quadratic"):
+                problem_spec["rs"] = 4
+                problem_spec["rp"] = 2
+            elif self.spec.satisfies("order=cubic"):
+                problem_spec["rs"] = 4
+                problem_spec["rp"] = 1
+        elif self.spec.satisfies("+weak"):
+            problem_spec["nx"] = [1, 2, 3, 4, 5, 6]
+            problem_spec["ny"] = [1, 2, 3, 4, 5, 6]
+            problem_spec["nz"] = [1, 2, 3, 4, 5, 6]
+            problem_spec["resource_count"] = [4, 32, 108, 256, 500, 864]
+            if self.spec.satisfies("order=linear"):
+                problem_spec["rs"] = 4
+                problem_spec["rp"] = 3
+            elif self.spec.satisfies("order=quadratic"):
+                problem_spec["rs"] = 4
+                problem_spec["rp"] = 2
+            elif self.spec.satisfies("order=cubic"):
+                problem_spec["rs"] = 4
+                problem_spec["rp"] = 1
+        else:
+            problem_spec["rs"] = 4
+            problem_spec["rp"] = 1
 
-        # resource_count is the number of resources used for this experiment:
-        self.add_experiment_variable("resource_count", 1, False)
+        self.add_experiment_variable("nx", problem_spec["nx"], True)
+        self.add_experiment_variable("ny", problem_spec["ny"], True)
+        self.add_experiment_variable("nz", problem_spec["nz"], True)
+        self.add_experiment_variable("rs", problem_spec["rs"], True)
+        self.add_experiment_variable("rp", problem_spec["rp"], True)
+        self.add_experiment_variable("ms", problem_spec["ms"], True)
+        self.add_experiment_variable("tf", problem_spec["tf"], True)
 
-        # Set the variables required by the experiment
-        self.set_required_variables(
-            n_resources="{resource_count}",
-            process_problem_size="{zones} / {n_resources}",
-            total_problem_size="{zones}",
+        self.add_experiment_variable(
+            "resource_count", problem_spec["resource_count"], True
         )
 
-        # Register the scaling variables and their respective scaling functions
-        # required to correctly scale the experiment for the given scaliing policy
-        # Strong scaling scales up resource_count by the specified scaling_factor
+        # Per-process size (in zones) in each dimension
+        self.add_experiment_variable("zones", "{nx}*{ny}*{nz}*(8**({rs}+{rp}))", False)
+
+        # Umpire device pool size
+        self.add_experiment_variable("pool", problem_spec["pool_size"], False)
+
         self.register_scaling_config(
             {
                 ScalingMode.Strong: {
-                    "resource_count": lambda var, itr, dim, scaling_factor: var.val(dim)
-                    * scaling_factor,
+                    "resource_count": problem_spec["strong"],
+                },
+                ScalingMode.Weak: {
+                    "resource_count": problem_spec["weak"],
+                },
+                ScalingMode.Throughput: {
+                    "resource_count": problem_spec["throughput"],
                 },
             }
         )
 
+    def compute_applications_section(self):
+        if self.spec.satisfies("exec_mode=perf"):
+            self.generate_perf_specs()
+        else:
+            # "zones" defined from mesh file, we are hardcoding it here
+            self.add_experiment_variable("nx", 1, True)
+            self.add_experiment_variable("ny", 1, True)
+            self.add_experiment_variable("nz", 1, True)
+            self.add_experiment_variable("rs", 3, True)
+            self.add_experiment_variable("rp", 2, True)
+            self.add_experiment_variable("ms", 250, True)
+            self.add_experiment_variable("tf", 10000, True)
+            self.add_experiment_variable(
+                "zones", "{nx}*{ny}*{nz}*(8**({rs}+{rp}))", False
+            )
+            self.add_experiment_variable("pool", 16, True)
+            # resource_count is the number of resources used for this experiment:
+            self.add_experiment_variable("resource_count", 1, False)
+
+            # Register the scaling variables and their respective scaling functions
+            # required to correctly scale the experiment for the given scaliing policy
+            # Strong scaling scales up resource_count by the specified scaling_factor
+            self.register_scaling_config(
+                {
+                    ScalingMode.Strong: {
+                        "resource_count": lambda var, itr, dim, scaling_factor: var.val(
+                            dim
+                        )
+                        * scaling_factor,
+                    },
+                    ScalingMode.Weak: {
+                        "resource_count": None,
+                    },
+                    ScalingMode.Throughput: {
+                        "resource_count": None,
+                    },
+                }
+            )
+
+        if self.spec.satisfies("order=linear"):
+            self.add_experiment_variable("order", "linear", True)
+            self.add_experiment_variable("ok", 1, False)
+            self.add_experiment_variable("ot", 0, False)
+        elif self.spec.satisfies("order=quadratic"):
+            self.add_experiment_variable("order", "quadratic", True)
+            self.add_experiment_variable("ok", 2, False)
+            self.add_experiment_variable("ot", 1, False)
+        elif self.spec.satisfies("order=cubic"):
+            self.add_experiment_variable("order", "cubic", True)
+            self.add_experiment_variable("ok", 3, False)
+            self.add_experiment_variable("ot", 2, False)
+        else:
+            self.add_experiment_variable("order", "linear", True)
+            self.add_experiment_variable("ok", 1, False)
+            self.add_experiment_variable("ot", 0, False)
+
+        if self.spec.satisfies("+nc"):
+            self.add_experiment_variable("nc_type", "nonconforming", True)
+            self.add_experiment_variable("nc", "-nc", False)
+        else:
+            self.add_experiment_variable("nc_type", "conforming", True)
+            self.add_experiment_variable("nc", "-no-nc", False)
+
+        # Set the variables required by the experiment
+        self.set_required_variables(
+            n_resources="{resource_count}",
+            process_problem_size="{zones} / {n_resources}",
+            total_problem_size="{zones}",
+        )
+
         if self.spec.satisfies("+cuda"):
             self.add_experiment_variable("device", "cuda", True)
         elif self.spec.satisfies("+rocm"):
diff --git a/lib/benchpark/scaling.py b/lib/benchpark/scaling.py
@@ -126,9 +126,10 @@ def scale_params(self, scaling_config):
         for itr in range(num_exprs):
             dim = (start_dim + itr) % ndims
             for var_name, scaling_func in scaling_config.items():
-                getattr(self.expr_vars, var_name).scale_dim(
-                    itr, dim, scaling_func, scaling_factor
-                )
+                if scaling_func:
+                    getattr(self.expr_vars, var_name).scale_dim(
+                        itr, dim, scaling_func, scaling_factor
+                    )
 
     BaseScaling.scale_params = scale_params
 
diff --git a/lib/benchpark/variables.py b/lib/benchpark/variables.py
@@ -151,8 +151,19 @@ def val(self, key):
 
     def scale_dim(self, itr, dim, scaling_func, sf):
         key = self._dims[0] if self.ndims == 1 else self._dims[dim]
-        for k in self._dims:
-            if k == key:
-                self._var[k].append(scaling_func(self, itr, k, sf))
-            else:
-                self._var[k].append(self.val(k))
+
+        next_val = scaling_func(self, itr, key, sf)
+
+        if not next_val:
+            return
+        elif isinstance(next_val, list):
+            idx = 0
+            for k in self._dims:
+                self._var[k].append(next_val[idx])
+                idx += 1
+        else:
+            for k in self._dims:
+                if k == key:
+                    self._var[k].append(scaling_func(self, itr, k, sf))
+                else:
+                    self._var[k].append(self.val(k))
diff --git a/repo/laghos/application.py b/repo/laghos/application.py
@@ -19,57 +19,102 @@ class Laghos(ExecutableApplication):
             'lagrangian','spatial-discretization','unstructured-grid',
             'network-latency-bound','network-collectives','unstructured-grid']
 
-    executable('prob', 'laghos -p {problem} -m {mesh} -nx {nx} -ny {ny} -nz {nz} -rs {rs} -rp {rp} -ms {ms} --fom {gam} -d {device} {assembly} -tf {tf}', use_mpi=True)
+    executable('triplept', 'laghos' +
+                       ' -p 3' +
+                       ' -m {mesh}' +
+                       ' -nx {nx} -ny {ny} -nz {nz}' +
+                       ' -rs {rs} -rp {rp}' +
+                       ' -ms {ms}' +
+                       ' -ok {ok} -ot {ot} -oq {oq}' +
+                       ' {nc} --mem --fom {gam}' +
+                       ' --dev-pool-size {pool}' +
+                       ' -d {device}' +
+                       ' {assembly}',
+                       use_mpi=True)
 
-    workload('triplept', executables=['prob'])
+    executable('sedov', 'laghos' +
+                       ' -p 1' +
+                       ' -m {mesh}' +
+                       ' -nx {nx} -ny {ny} -nz {nz}' +
+                       ' -rs {rs} -rp {rp}' +
+                       ' -ms {ms}' +
+                       ' -ok {ok} -ot {ot} -oq {oq}' +
+                       ' {nc} --mem --fom {gam}' +
+                       ' --dev-pool-size {pool}' +
+                       ' -d {device}' +
+                       ' {assembly}',
+                       use_mpi=True)
+
+    workload('triplept', executables=['triplept'])
+    workload('sedov', executables=['sedov'])
 
     workload_variable('mesh', default='default',
             description='mesh file',
-            workloads=['triplept'])
+            workloads=['*'])
 
     workload_variable('nx', default='2',
             description='Elements in x-dimension',
-            workloads=['triplept'])
+            workloads=['*'])
         
     workload_variable('ny', default='2',
             description='Elements in y-dimension',
-            workloads=['triplept'])
+            workloads=['*'])
         
     workload_variable('nz', default='2',
             description='Elements in z-dimension',
-            workloads=['triplept'])
+            workloads=['*'])
         
     workload_variable('problem', default='3',
             description='problem number',
-            workloads=['triplept'])
+            workloads=['*'])
         
     workload_variable('rs', default='2',
             description='number of serial refinements',
-            workloads=['triplept'])
+            workloads=['*'])
     
     workload_variable('rp', default='0',
             description='number of parallel refinements',
-            workloads=['triplept'])
+            workloads=['*'])
     
     workload_variable('ms', default='250',
             description='max number of steps',
-            workloads=['triplept'])
+            workloads=['*'])
+
+    workload_variable('ok', default='1',
+            description='Order (degree) of the kinematic finite element space',
+            workloads=['*'])
+
+    workload_variable('ot', default='0',
+            description='Order (degree) of the thermodynamic finite element space',
+            workloads=['*'])
+
+    workload_variable('oq', default='-1',
+            description='Order  of the integration rule',
+            workloads=['*'])
+
+    workload_variable('pool', default='4',
+        description='Device pool size',
+        workloads=['*'])
 
     workload_variable('device', default='cpu',
         description='cpu, cuda or hip',
-        workloads=['triplept'])
+        workloads=['*'])
 
     workload_variable('gam', default='--no-gpu-aware-mpi',
         description='--gpu-aware-mpi or --no-gpu-aware-mpi',
-        workloads=['triplept'])
+        workloads=['*'])
+
+    workload_variable('nc', default='-nc',
+        description='Use non-conforming meshes. Requires a 2D or 3D mesh.',
+        workloads=['*'])
 
     workload_variable('assembly', default='-pa',
             description='Activate 1D tensor-based assembly (partial assembly).',
-            workloads=['triplept'])
+            workloads=['*'])
     
-    workload_variable('tf', default='0.6',
+    workload_variable('tf', default='0.8',
             description='Final time; start time is 0.',
-            workloads=['triplept'])
+            workloads=['*'])
     
     figure_of_merit('Major kernels total time',
                     log_file='{experiment_run_dir}/{experiment_name}.out',
diff --git a/repo/laghos/package.py b/repo/laghos/package.py
diff --git a/repo/mfem/package.py b/repo/mfem/package.py

-Original file line number
+Diff line change
@@ @@ -1,2 +1,3 @@ @@
 cachable
 -parma
 +ot
 +parma